From b56754606bee9b5504ea89f912007fb8e75ad936 Mon Sep 17 00:00:00 2001 From: Chris Hartl Date: Wed, 9 Jan 2013 00:34:07 -0500 Subject: [PATCH 01/25] Initial break-out of GenotypeConcordance as a standalone walker. Some basic functionality testing. Currently performs only a pairwise comparison, but is very careful about proper tabulation through the GenotypeType enum. --- .../variantutils/ConcordanceMetrics.java | 252 +++++++++++++ .../variantutils/GenotypeConcordance.java | 211 +++++++++++ .../ConcordanceMetricsUnitTest.java | 339 ++++++++++++++++++ 3 files changed, 802 insertions(+) create mode 100644 protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetrics.java create mode 100644 protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordance.java create mode 100644 protected/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetricsUnitTest.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetrics.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetrics.java new file mode 100644 index 000000000..648df327d --- /dev/null +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetrics.java @@ -0,0 +1,252 @@ +package org.broadinstitute.sting.gatk.walkers.variantutils; + +import com.google.java.contract.Ensures; +import com.google.java.contract.Invariant; +import com.google.java.contract.Requires; +import org.broad.tribble.util.ParsingUtils; +import org.broadinstitute.sting.gatk.walkers.genotyper.DiploidGenotype; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.variant.variantcontext.*; +import org.broadinstitute.variant.vcf.VCFHeader; + +import java.util.*; + +/** + * A class for tabulating and evaluating a callset-by-callset genotype concordance table + * */ +public class ConcordanceMetrics { + + private Map perSampleGenotypeConcordance; + private GenotypeConcordanceTable overallGenotypeConcordance; + private SiteConcordanceTable overallSiteConcordance; + + public ConcordanceMetrics(VCFHeader evaluate, VCFHeader truth) { + HashSet overlappingSamples = new HashSet(evaluate.getGenotypeSamples()); + overlappingSamples.retainAll(truth.getGenotypeSamples()); + perSampleGenotypeConcordance = new HashMap(overlappingSamples.size()); + for ( String sample : overlappingSamples ) { + perSampleGenotypeConcordance.put(sample,new GenotypeConcordanceTable()); + } + overallGenotypeConcordance = new GenotypeConcordanceTable(); + overallSiteConcordance = new SiteConcordanceTable(); + } + + public GenotypeConcordanceTable getOverallGenotypeConcordance() { + return overallGenotypeConcordance; + } + + public SiteConcordanceTable getOverallSiteConcordance() { + return overallSiteConcordance; + } + + public GenotypeConcordanceTable getGenotypeConcordance(String sample) { + GenotypeConcordanceTable table = perSampleGenotypeConcordance.get(sample); + if ( table == null ) + throw new ReviewedStingException("Attempted to request the concordance table for a sample on which it was not calculated"); + return table; + } + + public Map getPerSampleGenotypeConcordance() { + return Collections.unmodifiableMap(perSampleGenotypeConcordance); + } + + public Map getPerSampleNRD() { + Map nrd = new HashMap(perSampleGenotypeConcordance.size()); + for ( Map.Entry sampleTable : perSampleGenotypeConcordance.entrySet() ) { + nrd.put(sampleTable.getKey(),calculateNRD(sampleTable.getValue())); + } + + return Collections.unmodifiableMap(nrd); + } + + public Double getOverallNRD() { + return calculateNRD(overallGenotypeConcordance); + } + + public Map getPerSampleNRS() { + Map nrs = new HashMap(perSampleGenotypeConcordance.size()); + for ( Map.Entry sampleTable : perSampleGenotypeConcordance.entrySet() ) { + nrs.put(sampleTable.getKey(),calculateNRS(sampleTable.getValue())); + } + + return Collections.unmodifiableMap(nrs); + } + + public Double getOverallNRS() { + return calculateNRS(overallGenotypeConcordance); + } + + @Requires({"eval != null","truth != null"}) + public void update(VariantContext eval, VariantContext truth) { + overallSiteConcordance.update(eval,truth); + Set alleleTruth = new HashSet(8); + alleleTruth.add(truth.getReference().getBaseString()); + for ( Allele a : truth.getAlternateAlleles() ) { + alleleTruth.add(a.getBaseString()); + } + for ( String sample : perSampleGenotypeConcordance.keySet() ) { + Genotype evalGenotype = eval.getGenotype(sample); + Genotype truthGenotype = truth.getGenotype(sample); + perSampleGenotypeConcordance.get(sample).update(evalGenotype,truthGenotype,alleleTruth); + overallGenotypeConcordance.update(evalGenotype,truthGenotype,alleleTruth); + } + } + + private static double calculateNRD(GenotypeConcordanceTable table) { + return calculateNRD(table.getTable()); + } + + private static double calculateNRD(int[][] concordanceCounts) { + int correct = 0; + int total = 0; + correct += concordanceCounts[GenotypeType.HET.ordinal()][GenotypeType.HET.ordinal()]; + correct += concordanceCounts[GenotypeType.HOM_VAR.ordinal()][GenotypeType.HOM_VAR.ordinal()]; + total += correct; + total += concordanceCounts[GenotypeType.HOM_REF.ordinal()][GenotypeType.HET.ordinal()]; + total += concordanceCounts[GenotypeType.HOM_REF.ordinal()][GenotypeType.HOM_VAR.ordinal()]; + total += concordanceCounts[GenotypeType.HET.ordinal()][GenotypeType.HOM_REF.ordinal()]; + total += concordanceCounts[GenotypeType.HET.ordinal()][GenotypeType.HOM_VAR.ordinal()]; + total += concordanceCounts[GenotypeType.HOM_VAR.ordinal()][GenotypeType.HOM_REF.ordinal()]; + total += concordanceCounts[GenotypeType.HOM_VAR.ordinal()][GenotypeType.HET.ordinal()]; + // NRD is by definition incorrec/total = 1.0-correct/total + return 1.0 - ( (double) correct)/( (double) total); + } + + private static double calculateNRS(GenotypeConcordanceTable table) { + return calculateNRS(table.getTable()); + } + + private static double calculateNRS(int[][] concordanceCounts) { + long confirmedVariant = 0; + long unconfirmedVariant = 0; + for ( GenotypeType truthState : Arrays.asList(GenotypeType.HET,GenotypeType.HOM_VAR) ) { + for ( GenotypeType evalState : GenotypeType.values() ) { + if ( evalState == GenotypeType.MIXED ) + continue; + if ( evalState.equals(GenotypeType.HET) || evalState.equals(GenotypeType.HOM_VAR) ) + confirmedVariant += concordanceCounts[evalState.ordinal()][truthState.ordinal()]; + else + unconfirmedVariant += concordanceCounts[evalState.ordinal()][truthState.ordinal()]; + } + } + + return ( (double) confirmedVariant ) / ( (double) ( confirmedVariant + unconfirmedVariant ) ); + } + + + class GenotypeConcordanceTable { + + private int[][] genotypeCounts; + private int nMismatchingAlt; + + public GenotypeConcordanceTable() { + genotypeCounts = new int[GenotypeType.values().length][GenotypeType.values().length]; + nMismatchingAlt = 0; + } + + @Requires({"eval!=null","truth != null","truthAlleles != null"}) + public void update(Genotype eval, Genotype truth, Set truthAlleles) { + // this is slow but correct + boolean matchingAlt = true; + if ( eval.isCalled() && truth.isCalled() ) { + // by default, no-calls "match" between alleles, so if + // one or both sites are no-call or unavailable, the alt alleles match + // otherwise, check explicitly: if the eval has an allele that's not ref, no-call, or present in truth + // the alt allele is mismatching - regardless of whether the genotype is correct. + for ( Allele evalAllele : eval.getAlleles() ) { + matchingAlt &= truthAlleles.contains(evalAllele.getBaseString()); + } + } + + if ( matchingAlt ) { + genotypeCounts[eval.getType().ordinal()][truth.getType().ordinal()]++; + } else { + nMismatchingAlt++; + } + } + + public int[][] getTable() { + return genotypeCounts; + } + + public int getnMismatchingAlt() { + return nMismatchingAlt; + } + + public int getnEvalGenotypes() { + int nGeno = 0; + for ( GenotypeType evalType : Arrays.asList(GenotypeType.HOM_REF,GenotypeType.HOM_VAR,GenotypeType.HET) ) { + for ( GenotypeType compType : GenotypeType.values() ) { + nGeno += genotypeCounts[evalType.ordinal()][compType.ordinal()]; + } + } + + return nGeno + nMismatchingAlt; + } + + public int getnCompGenotypes() { + int nGeno = 0; + for ( GenotypeType compType : Arrays.asList(GenotypeType.HOM_REF,GenotypeType.HOM_VAR,GenotypeType.HET) ) { + for ( GenotypeType evalType : GenotypeType.values() ) { + nGeno += genotypeCounts[evalType.ordinal()][compType.ordinal()]; + } + } + return nGeno; + } + + public int get(GenotypeType evalType, GenotypeType compType) { + return genotypeCounts[evalType.ordinal()][compType.ordinal()]; + } + } + + class SiteConcordanceTable { + + private int[] siteConcordance; + + public SiteConcordanceTable() { + siteConcordance = new int[SiteConcordanceType.values().length]; + } + + public void update(VariantContext evalVC, VariantContext truthVC) { + SiteConcordanceType matchType = getMatchType(evalVC,truthVC); + siteConcordance[matchType.ordinal()]++; + } + + @Requires({"evalVC != null","truthVC != null"}) + private SiteConcordanceType getMatchType(VariantContext evalVC, VariantContext truthVC) { + if ( evalVC.isMonomorphicInSamples() ) + return SiteConcordanceType.TRUTH_ONLY; + if ( truthVC.isMonomorphicInSamples() ) + return SiteConcordanceType.EVAL_ONLY; + + boolean evalSusbsetTruth = VariantContextUtils.allelesAreSubset(evalVC,truthVC); + boolean truthSubsetEval = VariantContextUtils.allelesAreSubset(truthVC,evalVC); + + if ( evalSusbsetTruth && truthSubsetEval ) + return SiteConcordanceType.ALLELES_MATCH; + else if ( evalSusbsetTruth ) + return SiteConcordanceType.EVAL_SUBSET_TRUTH; + else if ( truthSubsetEval ) + return SiteConcordanceType.EVAL_SUPERSET_TRUTH; + + return SiteConcordanceType.ALLELES_DO_NOT_MATCH; + } + + public int[] getSiteConcordance() { + return siteConcordance; + } + + public int get(SiteConcordanceType type) { + return getSiteConcordance()[type.ordinal()]; + } + } + + enum SiteConcordanceType { + ALLELES_MATCH, + EVAL_SUPERSET_TRUTH, + EVAL_SUBSET_TRUTH, + ALLELES_DO_NOT_MATCH, + EVAL_ONLY, + TRUTH_ONLY + } +} diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordance.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordance.java new file mode 100644 index 000000000..10d9a79ea --- /dev/null +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordance.java @@ -0,0 +1,211 @@ +package org.broadinstitute.sting.gatk.walkers.variantutils; + +import org.broadinstitute.sting.commandline.*; +import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.report.GATKReport; +import org.broadinstitute.sting.gatk.report.GATKReportTable; +import org.broadinstitute.sting.gatk.walkers.RodWalker; +import org.broadinstitute.sting.utils.SampleUtils; +import org.broadinstitute.sting.utils.collections.Pair; +import org.broadinstitute.sting.utils.variant.GATKVCFUtils; +import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils; +import org.broadinstitute.variant.variantcontext.*; +import org.broadinstitute.variant.vcf.VCFHeader; +import org.broadinstitute.variant.vcf.VCFUtils; + +import java.io.PrintStream; +import java.util.*; + +/** + * A simple walker for performing genotype concordance calculations between two callsets + */ +public class GenotypeConcordance extends RodWalker,ConcordanceMetrics> { + + @Input(fullName="eval",shortName="eval",doc="The variants and genotypes to evaluate",required=true) + RodBinding evalBinding; + + @Input(fullName="comp",shortName="comp",doc="The variants and genotypes to compare against",required=true) + RodBinding compBinding; + + @Argument(fullName="ignoreFilters",doc="Filters will be ignored",required=false) + boolean ignoreFilters = false; + + @Output + PrintStream out; + + List evalSamples; + List compSamples; + + public ConcordanceMetrics reduceInit() { + Map headerMap = GATKVCFUtils.getVCFHeadersFromRods(getToolkit(), Arrays.asList(evalBinding,compBinding)); + VCFHeader evalHeader = headerMap.get(evalBinding.getName()); + evalSamples = evalHeader.getGenotypeSamples(); + VCFHeader compHeader = headerMap.get(compBinding.getName()); + compSamples = compHeader.getGenotypeSamples(); + return new ConcordanceMetrics(evalHeader,compHeader); + } + + + public Pair map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + Pair evalCompPair = null; + if ( tracker != null && ( + tracker.getValues(evalBinding,ref.getLocus()).size() > 0 || + tracker.getValues(compBinding,ref.getLocus()).size() > 0 ) ) { + + List eval = tracker.getValues(evalBinding,ref.getLocus()); + List comp = tracker.getValues(compBinding,ref.getLocus()); + if ( eval.size() > 1 || comp.size() > 1 ) { + logger.warn("Eval or Comp Rod at position "+ref.getLocus().toString()+" has multiple records. Site will be skipped."); + return evalCompPair; + } + // if a rod is missing, explicitly create a variant context with 'missing' genotypes. Slow, but correct. + // note that if there is no eval rod there must be a comp rod, and also the reverse + VariantContext evalContext = eval.size() == 1 ? eval.get(0) : createEmptyContext(ref,comp.get(0),evalSamples); + VariantContext compContext = comp.size() == 1 ? comp.get(0) : createEmptyContext(ref,eval.get(0),compSamples); + evalContext = filterGenotypes(evalContext,ignoreFilters); + compContext = filterGenotypes(compContext,ignoreFilters); + evalCompPair = new Pair(evalContext,compContext); + } + + return evalCompPair; + } + + public ConcordanceMetrics reduce(Pair evalComp, ConcordanceMetrics metrics) { + if ( evalComp != null ) + metrics.update(evalComp.getFirst(),evalComp.getSecond()); + return metrics; + } + + public void onTraversalDone(ConcordanceMetrics metrics) { + GATKReport report = new GATKReport(); + GATKReportTable concordanceCounts = new GATKReportTable("GenotypeConcordance_Counts","Per-sample concordance tables: comparison counts",2+GenotypeType.values().length*GenotypeType.values().length); + GATKReportTable concordanceEvalProportions = new GATKReportTable("GenotypeConcordance_EvalProportions", "Per-sample concordance tables: proportions of genotypes called in eval",2+GenotypeType.values().length*GenotypeType.values().length); + GATKReportTable concordanceCompProportions = new GATKReportTable("GenotypeConcordance_CompProportions", "Per-sample concordance tables: proportions of genotypes called in comp",2+GenotypeType.values().length*GenotypeType.values().length); + GATKReportTable concordanceSummary = new GATKReportTable("GenotypeConcordance_Summary","Per-sample summary statistics: NRS and NRD",2); + GATKReportTable siteConcordance = new GATKReportTable("SiteConcordance_Summary","Site-level summary statistics",ConcordanceMetrics.SiteConcordanceType.values().length); + concordanceCompProportions.addColumn("Sample","%s"); + concordanceCounts.addColumn("Sample","%s"); + concordanceEvalProportions.addColumn("Sample","%s"); + concordanceSummary.addColumn("Sample","%s"); + for ( GenotypeType evalType : GenotypeType.values() ) { + for ( GenotypeType compType : GenotypeType.values() ) { + String colKey = String.format("%s_%s", evalType.toString(), compType.toString()); + concordanceCounts.addColumn(colKey,"%d"); + if ( evalType == GenotypeType.HET || evalType == GenotypeType.HOM_REF || evalType == GenotypeType.HOM_VAR) + concordanceEvalProportions.addColumn(colKey,"%.3f"); + if ( compType == GenotypeType.HET || compType == GenotypeType.HOM_VAR || compType == GenotypeType.HOM_REF ) + concordanceCompProportions.addColumn(colKey,"%.3f"); + } + } + concordanceEvalProportions.addColumn("Mismatching_Alleles","%.3f"); + concordanceCompProportions.addColumn("Mismatching_Alleles","%.3f"); + concordanceCounts.addColumn("Mismatching_Alleles","%d"); + concordanceSummary.addColumn("Non-Reference Sensitivity","%.3f"); + concordanceSummary.addColumn("Non-Reference Discrepancy","%.3f"); + for (ConcordanceMetrics.SiteConcordanceType type : ConcordanceMetrics.SiteConcordanceType.values() ) { + siteConcordance.addColumn(type.toString(),"%d"); + } + + for ( Map.Entry entry : metrics.getPerSampleGenotypeConcordance().entrySet() ) { + ConcordanceMetrics.GenotypeConcordanceTable table = entry.getValue(); + concordanceEvalProportions.set(entry.getKey(),"Sample",entry.getKey()); + concordanceCompProportions.set(entry.getKey(),"Sample",entry.getKey()); + concordanceCounts.set(entry.getKey(),"Sample",entry.getKey()); + int nEval = table.getnEvalGenotypes(); + int nComp = table.getnCompGenotypes(); + for ( GenotypeType evalType : GenotypeType.values() ) { + for ( GenotypeType compType : GenotypeType.values() ) { + String colKey = String.format("%s_%s",evalType.toString(),compType.toString()); + int count = table.get(evalType, compType); + concordanceCounts.set(entry.getKey(),colKey,count); + if ( evalType == GenotypeType.HET || evalType == GenotypeType.HOM_REF || evalType == GenotypeType.HOM_VAR) + concordanceEvalProportions.set(entry.getKey(),colKey,( (double) count)/nEval); + if ( compType == GenotypeType.HET || compType == GenotypeType.HOM_VAR || compType == GenotypeType.HOM_REF ) + concordanceCompProportions.set(entry.getKey(),colKey,( (double) count)/nComp); + } + } + concordanceEvalProportions.set(entry.getKey(),"Mismatching_Alleles", ( (double) table.getnMismatchingAlt() )/nEval); + concordanceCompProportions.set(entry.getKey(),"Mismatching_Alleles", ( (double) table.getnMismatchingAlt() )/nComp); + concordanceCounts.set(entry.getKey(),"Mismatching_Alleles",table.getnMismatchingAlt()); + } + + String rowKey = "ALL"; + concordanceCompProportions.set(rowKey,"Sample",rowKey); + concordanceEvalProportions.set(rowKey,"Sample",rowKey); + concordanceCounts.set(rowKey,"Sample",rowKey); + ConcordanceMetrics.GenotypeConcordanceTable table = metrics.getOverallGenotypeConcordance(); + int nEval = table.getnEvalGenotypes(); + int nComp = table.getnCompGenotypes(); + for ( GenotypeType evalType : GenotypeType.values() ) { + for ( GenotypeType compType : GenotypeType.values() ) { + String colKey = String.format("%s_%s",evalType.toString(),compType.toString()); + int count = table.get(evalType,compType); + concordanceCounts.set(rowKey,colKey,count); + if ( evalType == GenotypeType.HET || evalType == GenotypeType.HOM_REF || evalType == GenotypeType.HOM_VAR) + concordanceEvalProportions.set(rowKey,colKey,( (double) count)/nEval); + if ( compType == GenotypeType.HET || compType == GenotypeType.HOM_VAR || compType == GenotypeType.HOM_REF ) + concordanceCompProportions.set(rowKey,colKey,( (double) count)/nComp); + } + } + concordanceEvalProportions.set(rowKey,"Mismatching_Alleles", ( (double) table.getnMismatchingAlt() )/nEval); + concordanceCompProportions.set(rowKey,"Mismatching_Alleles", ( (double) table.getnMismatchingAlt() )/nComp); + concordanceCounts.set(rowKey,"Mismatching_Alleles",table.getnMismatchingAlt()); + + for ( Map.Entry nrsEntry : metrics.getPerSampleNRS().entrySet() ) { + concordanceSummary.set(nrsEntry.getKey(),"Sample",nrsEntry.getKey()); + concordanceSummary.set(nrsEntry.getKey(),"Non-Reference Sensitivity",nrsEntry.getValue()); + } + for ( Map.Entry nrdEntry : metrics.getPerSampleNRD().entrySet() ) { + concordanceSummary.set(nrdEntry.getKey(),"Non-Reference Discrepancy",nrdEntry.getValue()); + } + concordanceSummary.set("ALL","Sample","ALL"); + concordanceSummary.set("ALL","Non-Reference Sensitivity",metrics.getOverallNRS()); + concordanceSummary.set("ALL","Non-Reference Discrepancy",metrics.getOverallNRD()); + + for (ConcordanceMetrics.SiteConcordanceType type : ConcordanceMetrics.SiteConcordanceType.values() ) { + siteConcordance.set("Comparison",type.toString(),metrics.getOverallSiteConcordance().get(type)); + } + + report.addTable(concordanceCompProportions); + report.addTable(concordanceEvalProportions); + report.addTable(concordanceCounts); + report.addTable(concordanceSummary); + report.addTable(siteConcordance); + + report.print(out); + } + + public VariantContext createEmptyContext(ReferenceContext ref, VariantContext other, List samples) { + VariantContextBuilder builder = new VariantContextBuilder(); + // set the alleles to be the same + builder.alleles(other.getAlleles()); + builder.loc(other.getChr(),other.getStart(),other.getEnd()); + // set all genotypes to empty + List genotypes = new ArrayList(samples.size()); + for ( String sample : samples ) + genotypes.add(GenotypeBuilder.create(sample, new ArrayList(0))); + builder.genotypes(genotypes); + return builder.make(); + } + + public VariantContext filterGenotypes(VariantContext context, boolean ignoreSiteFilter) { + // placeholder method for genotype-level filtering. However if the site itself is filtered, + // and such filters are not ignored, the genotype-level data should be altered to reflect this + if ( ! context.isFiltered() || ignoreSiteFilter ) { + // todo -- add genotype-level jexl filtering here + return context; + } + VariantContextBuilder builder = new VariantContextBuilder(); + builder.alleles(Arrays.asList(context.getReference())); + builder.loc(context.getChr(),context.getStart(),context.getEnd()); + List newGeno = new ArrayList(context.getNSamples()); + for ( Genotype g : context.getGenotypes().iterateInSampleNameOrder() ) { + newGeno.add(GenotypeBuilder.create(g.getSampleName(),new ArrayList())); + } + builder.genotypes(newGeno); + return builder.make(); + } +} \ No newline at end of file diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetricsUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetricsUnitTest.java new file mode 100644 index 000000000..9a5b18735 --- /dev/null +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetricsUnitTest.java @@ -0,0 +1,339 @@ +package org.broadinstitute.sting.gatk.walkers.variantutils; + +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.gatk.walkers.variantutils.ConcordanceMetrics; +import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.collections.Pair; +import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; +import org.broadinstitute.variant.utils.BaseUtils; +import org.broadinstitute.variant.variantcontext.*; +import org.broadinstitute.variant.variantcontext.Allele; +import org.broadinstitute.variant.variantcontext.Genotype; +import org.broadinstitute.variant.variantcontext.GenotypeBuilder; +import org.broadinstitute.variant.variantcontext.GenotypeType; +import org.broadinstitute.variant.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.VariantContextBuilder; +import org.broadinstitute.variant.vcf.VCFCodec; +import org.broadinstitute.variant.vcf.VCFHeader; +import org.testng.annotations.Test; +import org.broad.tribble.readers.AsciiLineReader; +import org.broad.tribble.readers.PositionalBufferedStream; +import org.broadinstitute.variant.vcf.*; +import org.testng.Assert; +import org.testng.annotations.BeforeClass; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.StringBufferInputStream; +import java.util.ArrayList; +import java.util.Set; +import java.util.Arrays; +import java.util.List; +import net.sf.picard.reference.ReferenceSequenceFile; + +public class ConcordanceMetricsUnitTest extends BaseTest { + + private static ReferenceSequenceFile seq; + private GenomeLocParser genomeLocParser; + + @BeforeClass + public void init() throws FileNotFoundException { + // sequence + seq = new CachingIndexedFastaSequenceFile(new File(hg18Reference)); + genomeLocParser = new GenomeLocParser(seq); + } + public static String HEADER_BASE = "##fileformat=VCFv4.0\n" + + "##filedate=2010-06-21\n"+ + "##reference=NCBI36\n"+ + "##INFO=\n"+ + "##INFO=\n"+ + "##INFO=\n"+ + "##FILTER=\n"+ + "##FORMAT=\n"+ + "##FORMAT=\n"+ + "##FORMAT=\n" + + "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t"; + public static String TEST_1_HEADER = HEADER_BASE + "test1_sample1\ttest1_sample2\ttest1_sample3\n"; + + + private Pair getData1() { + + Allele reference_A = Allele.create(BaseUtils.A,true); + Allele alt_C = Allele.create(BaseUtils.C); + + Genotype sam_1_1_eval = GenotypeBuilder.create("test1_sample1", Arrays.asList(reference_A,reference_A)); + Genotype sam_1_2_eval = GenotypeBuilder.create("test1_sample2", Arrays.asList(reference_A,alt_C)); + Genotype sam_1_3_eval = GenotypeBuilder.create("test1_sample3", Arrays.asList(reference_A,alt_C)); + + Genotype sam_1_1_truth = GenotypeBuilder.create("test1_sample1", Arrays.asList(reference_A,reference_A)); + Genotype sam_1_2_truth = GenotypeBuilder.create("test1_sample2", Arrays.asList(reference_A,reference_A)); + Genotype sam_1_3_truth = GenotypeBuilder.create("test1_sample3", Arrays.asList(alt_C,alt_C)); + + GenomeLoc loc = genomeLocParser.createGenomeLoc("chr1", 3, 3); + VariantContextBuilder eval_1_builder = new VariantContextBuilder(); + VariantContextBuilder truth_1_builder = new VariantContextBuilder(); + + eval_1_builder.alleles(Arrays.asList(reference_A,alt_C)); + truth_1_builder.alleles(Arrays.asList(reference_A,alt_C)); + eval_1_builder.genotypes(Arrays.asList(sam_1_1_eval,sam_1_2_eval,sam_1_3_eval)); + truth_1_builder.genotypes(Arrays.asList(sam_1_1_truth,sam_1_2_truth,sam_1_3_truth)); + + eval_1_builder.loc(loc.getContig(),loc.getStart(),loc.getStop()); + truth_1_builder.loc(loc.getContig(),loc.getStart(),loc.getStop()); + + Pair testData = new Pair(eval_1_builder.make(),truth_1_builder.make()); + + return testData; + } + + @Test(enabled=true) + public void testSimpleComparison() { + Pair data = getData1(); + VariantContext eval = data.getFirst(); + VariantContext truth = data.getSecond(); + VCFCodec codec = new VCFCodec(); + VCFHeader evalHeader = (VCFHeader)codec.readHeader(new AsciiLineReader(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER)))); + VCFHeader compHeader = (VCFHeader)codec.readHeader(new AsciiLineReader(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER)))); + ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader); + metrics.update(eval,truth); + Assert.assertEquals(eval.getGenotype("test1_sample2").getType().ordinal(), 2); + Assert.assertEquals(truth.getGenotype("test1_sample2").getType().ordinal(),1); + Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getnMismatchingAlt(),0); + Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getTable()[2][1],1); + Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample3").getTable()[2][1],0); + Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample3").getTable()[2][3],1); + Assert.assertEquals(metrics.getOverallGenotypeConcordance().getTable()[1][1],1); + } + + private Pair getData2() { + + Allele reference_A = Allele.create(BaseUtils.A,true); + Allele alt_C = Allele.create(BaseUtils.C); + Allele alt_T = Allele.create(BaseUtils.T); + + Genotype sam_1_1_eval = GenotypeBuilder.create("test1_sample1", Arrays.asList(reference_A,reference_A)); + Genotype sam_1_2_eval = GenotypeBuilder.create("test1_sample2", Arrays.asList(reference_A,alt_T)); + Genotype sam_1_3_eval = GenotypeBuilder.create("test1_sample3", Arrays.asList(reference_A,alt_C)); + + Genotype sam_1_1_truth = GenotypeBuilder.create("test1_sample1", Arrays.asList(reference_A,reference_A)); + Genotype sam_1_2_truth = GenotypeBuilder.create("test1_sample2", Arrays.asList(reference_A,alt_C)); + Genotype sam_1_3_truth = GenotypeBuilder.create("test1_sample3", Arrays.asList(alt_C,alt_C)); + + GenomeLoc loc = genomeLocParser.createGenomeLoc("chr1", 3, 3); + VariantContextBuilder eval_1_builder = new VariantContextBuilder(); + VariantContextBuilder truth_1_builder = new VariantContextBuilder(); + + eval_1_builder.alleles(Arrays.asList(reference_A,alt_C,alt_T)); + truth_1_builder.alleles(Arrays.asList(reference_A,alt_C)); + eval_1_builder.genotypes(Arrays.asList(sam_1_1_eval,sam_1_2_eval,sam_1_3_eval)); + truth_1_builder.genotypes(Arrays.asList(sam_1_1_truth,sam_1_2_truth,sam_1_3_truth)); + + eval_1_builder.loc(loc.getContig(),loc.getStart(),loc.getStop()); + truth_1_builder.loc(loc.getContig(),loc.getStart(),loc.getStop()); + + Pair testData = new Pair(eval_1_builder.make(),truth_1_builder.make()); + + return testData; + } + + @Test(enabled=true) + public void testMismatchingAllele() { + Pair data = getData2(); + VariantContext eval = data.getFirst(); + VariantContext truth = data.getSecond(); + VCFCodec codec = new VCFCodec(); + VCFHeader evalHeader = (VCFHeader)codec.readHeader(new AsciiLineReader(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER)))); + VCFHeader compHeader = (VCFHeader)codec.readHeader(new AsciiLineReader(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER)))); + ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader); + metrics.update(eval,truth); + Assert.assertEquals(eval.getGenotype("test1_sample2").getType().ordinal(), 2); + Assert.assertEquals(truth.getGenotype("test1_sample2").getType().ordinal(),2); + Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getnMismatchingAlt(),1); + Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getTable()[2][1],0); + Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample3").getTable()[2][1],0); + Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample3").getTable()[2][3],1); + Assert.assertEquals(metrics.getOverallGenotypeConcordance().getTable()[1][1],1); + Assert.assertEquals(metrics.getOverallSiteConcordance().getSiteConcordance()[ConcordanceMetrics.SiteConcordanceType.EVAL_SUPERSET_TRUTH.ordinal()],1); + Assert.assertEquals(metrics.getOverallSiteConcordance().getSiteConcordance()[ConcordanceMetrics.SiteConcordanceType.ALLELES_DO_NOT_MATCH.ordinal()],0); + Assert.assertEquals(metrics.getOverallSiteConcordance().getSiteConcordance()[ConcordanceMetrics.SiteConcordanceType.ALLELES_MATCH.ordinal()],0); + } + + private Pair getData3() { + + Allele reference_ACT = Allele.create(new byte[]{BaseUtils.A,BaseUtils.C,BaseUtils.T},true); + Allele alt_AC = Allele.create(new byte[]{BaseUtils.A,BaseUtils.C}); + Allele alt_A = Allele.create(BaseUtils.A); + Allele alt_ATT = Allele.create(new byte[]{BaseUtils.A,BaseUtils.T,BaseUtils.T}); + + Genotype sam_1_1_eval = GenotypeBuilder.create("test1_sample1", Arrays.asList(reference_ACT,alt_ATT)); + Genotype sam_1_2_eval = GenotypeBuilder.create("test1_sample2", Arrays.asList(alt_A,alt_A)); + Genotype sam_1_3_eval = GenotypeBuilder.create("test1_sample3", Arrays.asList(reference_ACT,alt_A)); + + Genotype sam_1_1_truth = GenotypeBuilder.create("test1_sample1", Arrays.asList(reference_ACT,alt_AC)); + Genotype sam_1_2_truth = GenotypeBuilder.create("test1_sample2", Arrays.asList(alt_A,alt_A)); + Genotype sam_1_3_truth = GenotypeBuilder.create("test1_sample3", Arrays.asList(reference_ACT,alt_A)); + + GenomeLoc loc = genomeLocParser.createGenomeLoc("chr1", 3, 5); + VariantContextBuilder eval_1_builder = new VariantContextBuilder(); + VariantContextBuilder truth_1_builder = new VariantContextBuilder(); + + eval_1_builder.alleles(Arrays.asList(reference_ACT,alt_ATT,alt_A)); + truth_1_builder.alleles(Arrays.asList(reference_ACT,alt_AC,alt_A)); + eval_1_builder.genotypes(Arrays.asList(sam_1_1_eval,sam_1_2_eval,sam_1_3_eval)); + truth_1_builder.genotypes(Arrays.asList(sam_1_1_truth,sam_1_2_truth,sam_1_3_truth)); + + eval_1_builder.loc(loc.getContig(),loc.getStart(),loc.getStop()); + truth_1_builder.loc(loc.getContig(),loc.getStart(),loc.getStop()); + + Pair testData = new Pair(eval_1_builder.make(),truth_1_builder.make()); + + return testData; + } + + @Test(enabled=true) + public void testComplex() { + Pair data = getData3(); + VariantContext eval = data.getFirst(); + VariantContext truth = data.getSecond(); + VCFCodec codec = new VCFCodec(); + VCFHeader evalHeader = (VCFHeader)codec.readHeader(new AsciiLineReader(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER)))); + VCFHeader compHeader = (VCFHeader)codec.readHeader(new AsciiLineReader(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER)))); + ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader); + metrics.update(eval,truth); + Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample1").getnMismatchingAlt(),1); + Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getTable()[2][1],0); + Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getTable()[3][3],1); + Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getTable()[1][1],0); + Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample3").getTable()[2][1],0); + Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample3").getTable()[2][2],1); + Assert.assertEquals(metrics.getOverallGenotypeConcordance().getTable()[3][3],1); + Assert.assertEquals(metrics.getOverallSiteConcordance().getSiteConcordance()[ConcordanceMetrics.SiteConcordanceType.EVAL_SUPERSET_TRUTH.ordinal()],0); + Assert.assertEquals(metrics.getOverallSiteConcordance().getSiteConcordance()[ConcordanceMetrics.SiteConcordanceType.ALLELES_DO_NOT_MATCH.ordinal()],1); + Assert.assertEquals(metrics.getOverallSiteConcordance().getSiteConcordance()[ConcordanceMetrics.SiteConcordanceType.ALLELES_MATCH.ordinal()],0); + } + + private Pair getData4() { + + Allele reference_A = Allele.create(BaseUtils.A,true); + Allele alt_C = Allele.create(BaseUtils.C); + Allele alt_T = Allele.create(BaseUtils.T); + + Genotype sam_1_1_eval = GenotypeBuilder.create("test1_sample1", Arrays.asList(reference_A,reference_A)); + Genotype sam_1_2_eval = GenotypeBuilder.create("test1_sample2", Arrays.asList(Allele.NO_CALL,Allele.NO_CALL)); + Genotype sam_1_3_eval = GenotypeBuilder.create("test1_sample3", Arrays.asList(reference_A,alt_C)); + + Genotype sam_1_1_truth = GenotypeBuilder.create("test1_sample1", Arrays.asList(reference_A,reference_A)); + Genotype sam_1_2_truth = GenotypeBuilder.create("test1_sample2", Arrays.asList(reference_A,alt_C)); + Genotype sam_1_3_truth = GenotypeBuilder.create("test1_sample3", Arrays.asList(Allele.NO_CALL,Allele.NO_CALL)); + + GenomeLoc loc = genomeLocParser.createGenomeLoc("chr1", 3, 3); + VariantContextBuilder eval_1_builder = new VariantContextBuilder(); + VariantContextBuilder truth_1_builder = new VariantContextBuilder(); + + eval_1_builder.alleles(Arrays.asList(reference_A,alt_C,alt_T)); + truth_1_builder.alleles(Arrays.asList(reference_A,alt_C)); + eval_1_builder.genotypes(Arrays.asList(sam_1_1_eval,sam_1_2_eval,sam_1_3_eval)); + truth_1_builder.genotypes(Arrays.asList(sam_1_1_truth,sam_1_2_truth,sam_1_3_truth)); + + eval_1_builder.loc(loc.getContig(),loc.getStart(),loc.getStop()); + truth_1_builder.loc(loc.getContig(),loc.getStart(),loc.getStop()); + + Pair testData = new Pair(eval_1_builder.make(),truth_1_builder.make()); + + return testData; + } + + @Test(enabled=true) + public void testNoCalls() { + Pair data = getData4(); + VariantContext eval = data.getFirst(); + VariantContext truth = data.getSecond(); + VCFCodec codec = new VCFCodec(); + VCFHeader evalHeader = (VCFHeader)codec.readHeader(new AsciiLineReader(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER)))); + VCFHeader compHeader = (VCFHeader)codec.readHeader(new AsciiLineReader(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER)))); + ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader); + metrics.update(eval,truth); + Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getnMismatchingAlt(),0); + Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getTable()[2][1],0); + Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getTable()[0][2],1); + Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample3").getTable()[2][1],0); + Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample3").getTable()[2][3],0); + Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample3").getTable()[2][0],1); + } + + private Pair getData5() { + + Allele reference_A = Allele.create(BaseUtils.A,true); + Allele alt_C = Allele.create(BaseUtils.C); + Allele alt_T = Allele.create(BaseUtils.T); + + Genotype sam_1_1_eval = GenotypeBuilder.create("test1_sample1", Arrays.asList(reference_A,reference_A)); + Genotype sam_1_2_eval = GenotypeBuilder.create("test1_sample2", new ArrayList(0)); + Genotype sam_1_3_eval = GenotypeBuilder.create("test1_sample3", Arrays.asList(reference_A,alt_C)); + + Genotype sam_1_1_truth = GenotypeBuilder.create("test1_sample1", Arrays.asList(reference_A,reference_A)); + Genotype sam_1_2_truth = GenotypeBuilder.create("test1_sample2", Arrays.asList(reference_A,alt_C)); + Genotype sam_1_3_truth = GenotypeBuilder.create("test1_sample3", new ArrayList(0)); + + GenomeLoc loc = genomeLocParser.createGenomeLoc("chr1", 3, 3); + VariantContextBuilder eval_1_builder = new VariantContextBuilder(); + VariantContextBuilder truth_1_builder = new VariantContextBuilder(); + + eval_1_builder.alleles(Arrays.asList(reference_A,alt_C,alt_T)); + truth_1_builder.alleles(Arrays.asList(reference_A,alt_C)); + eval_1_builder.genotypes(Arrays.asList(sam_1_1_eval,sam_1_2_eval,sam_1_3_eval)); + truth_1_builder.genotypes(Arrays.asList(sam_1_1_truth,sam_1_2_truth,sam_1_3_truth)); + + eval_1_builder.loc(loc.getContig(),loc.getStart(),loc.getStop()); + truth_1_builder.loc(loc.getContig(),loc.getStart(),loc.getStop()); + + Pair testData = new Pair(eval_1_builder.make(),truth_1_builder.make()); + + return testData; + } + + @Test(enabled=true) + public void testMissing() { + Pair data = getData5(); + VariantContext eval = data.getFirst(); + VariantContext truth = data.getSecond(); + VCFCodec codec = new VCFCodec(); + VCFHeader evalHeader = (VCFHeader)codec.readHeader(new AsciiLineReader(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER)))); + VCFHeader compHeader = (VCFHeader)codec.readHeader(new AsciiLineReader(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER)))); + ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader); + metrics.update(eval,truth); + Assert.assertTrue(eval.getGenotype("test1_sample2").getType().equals(GenotypeType.UNAVAILABLE)); + Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getnMismatchingAlt(),0); + Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getTable()[2][1],0); + Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getTable()[0][2],0); + Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getTable()[4][2],1); + Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample3").getTable()[2][1],0); + Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample3").getTable()[2][3],0); + Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample3").getTable()[2][0],0); + Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample3").getTable()[2][4],1); + } + + @Test(enabled=true) + public void testNRD_testNRS() { + Pair data = getData3(); + VariantContext eval = data.getFirst(); + VariantContext truth = data.getSecond(); + VCFCodec codec = new VCFCodec(); + VCFHeader evalHeader = (VCFHeader)codec.readHeader(new AsciiLineReader(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER)))); + VCFHeader compHeader = (VCFHeader)codec.readHeader(new AsciiLineReader(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER)))); + ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader); + int[][] table = metrics.getOverallGenotypeConcordance().getTable(); + // set up the table + table[0] = new int[] {30, 12, 7, 5, 6, 0}; + table[1] = new int[] {10, 100, 5, 1, 7, 1}; + table[2] = new int[] {5, 7, 150, 3, 3, 1}; + table[3] = new int[] {3, 2, 6, 50, 1, 0}; + table[4] = new int[] {10, 6, 3, 3, 2, 0}; + table[5] = new int[] {12, 0, 34, 20, 10, 0}; + double EXPEC_NRS = 0.8969957; + double EXPEC_NRD = 0.1071429; + Assert.assertEquals(EXPEC_NRS,metrics.getOverallNRS(),1e-7); + Assert.assertEquals(EXPEC_NRD,metrics.getOverallNRD(),1e-7); + } +} \ No newline at end of file From ad7c2a08d42ef2bbf11e3c29ef794e11d26fd1df Mon Sep 17 00:00:00 2001 From: Chris Hartl Date: Wed, 9 Jan 2013 09:12:41 -0500 Subject: [PATCH 02/25] Normalize by the event type counts, not the total genotype counts: more useful normalization. --- .../variantutils/ConcordanceMetrics.java | 26 +++++++++++++------ .../variantutils/GenotypeConcordance.java | 20 ++++++-------- 2 files changed, 26 insertions(+), 20 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetrics.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetrics.java index 648df327d..ac6752277 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetrics.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetrics.java @@ -173,23 +173,33 @@ public class ConcordanceMetrics { return nMismatchingAlt; } - public int getnEvalGenotypes() { + public int getnEvalGenotypes(GenotypeType type) { + int nGeno = 0; + for ( GenotypeType comptype : GenotypeType.values() ) + nGeno += genotypeCounts[type.ordinal()][comptype.ordinal()]; + return nGeno; + } + + public int getnCalledEvalGenotypes() { int nGeno = 0; for ( GenotypeType evalType : Arrays.asList(GenotypeType.HOM_REF,GenotypeType.HOM_VAR,GenotypeType.HET) ) { - for ( GenotypeType compType : GenotypeType.values() ) { - nGeno += genotypeCounts[evalType.ordinal()][compType.ordinal()]; - } + nGeno += getnEvalGenotypes(evalType); } return nGeno + nMismatchingAlt; } - public int getnCompGenotypes() { + public int getnCompGenotypes(GenotypeType type) { + int nGeno = 0; + for ( GenotypeType evaltype : GenotypeType.values() ) + nGeno += genotypeCounts[evaltype.ordinal()][type.ordinal()]; + return nGeno; + } + + public int getnCalledCompGenotypes() { int nGeno = 0; for ( GenotypeType compType : Arrays.asList(GenotypeType.HOM_REF,GenotypeType.HOM_VAR,GenotypeType.HET) ) { - for ( GenotypeType evalType : GenotypeType.values() ) { - nGeno += genotypeCounts[evalType.ordinal()][compType.ordinal()]; - } + nGeno += getnCompGenotypes(compType); } return nGeno; } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordance.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordance.java index 10d9a79ea..7991e3432 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordance.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordance.java @@ -114,21 +114,19 @@ public class GenotypeConcordance extends RodWalker nrsEntry : metrics.getPerSampleNRS().entrySet() ) { From c87ad8c0ef9868a86463e58343322261b0e4fe77 Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Wed, 9 Jan 2013 10:00:46 -0500 Subject: [PATCH 03/25] Bug fixes related to HC's GGA mode. Tracking just the artificial allele isn't sufficient when there are multiple GGA records that change the reference basis. Also, duplicated records screw up the tracking of merged alleles. --- .../haplotypecaller/GenotypingEngine.java | 33 +++++++++----- .../SimpleDeBruijnAssembler.java | 5 ++- .../broadinstitute/sting/utils/Haplotype.java | 43 +++++++++++++------ 3 files changed, 55 insertions(+), 26 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java index 0c7e43525..7c25ab551 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java @@ -115,19 +115,31 @@ public class GenotypingEngine { } } } else { // we are in GGA mode! + int compCount = 0; for( final VariantContext compVC : activeAllelesToGenotype ) { if( compVC.getStart() == loc ) { - priorityList.clear(); int alleleCount = 0; for( final Allele compAltAllele : compVC.getAlternateAlleles() ) { ArrayList alleleSet = new ArrayList(2); alleleSet.add(compVC.getReference()); alleleSet.add(compAltAllele); - priorityList.add("Allele" + alleleCount); - eventsAtThisLoc.add(new VariantContextBuilder(compVC).alleles(alleleSet).source("Allele"+alleleCount).make()); + final String vcSourceName = "Comp" + compCount + "Allele" + alleleCount; + // check if this event is already in the list of events due to a repeat in the input alleles track + final VariantContext candidateEventToAdd = new VariantContextBuilder(compVC).alleles(alleleSet).source(vcSourceName).make(); + boolean alreadyExists = false; + for( final VariantContext eventToTest : eventsAtThisLoc ) { + if( eventToTest.hasSameAllelesAs(candidateEventToAdd) ) { + alreadyExists = true; + } + } + if( !alreadyExists ) { + priorityList.add(vcSourceName); + eventsAtThisLoc.add(candidateEventToAdd); + } alleleCount++; } } + compCount++; } } @@ -137,14 +149,14 @@ public class GenotypingEngine { final Map> eventMapper = createEventMapper(loc, eventsAtThisLoc, haplotypes); // Sanity check the priority list for mistakes - sanityCheckPriorityList( priorityList, eventsAtThisLoc ); + validatePriorityList( priorityList, eventsAtThisLoc ); // Merge the event to find a common reference representation final VariantContext mergedVC = VariantContextUtils.simpleMerge(eventsAtThisLoc, priorityList, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, VariantContextUtils.GenotypeMergeType.PRIORITIZE, false, false, null, false, false); if( mergedVC == null ) { continue; } if( eventsAtThisLoc.size() != mergedVC.getAlternateAlleles().size() ) { - throw new ReviewedStingException("Something went wrong in the merging of alleles."); + throw new ReviewedStingException("Record size mismatch! Something went wrong in the merging of alleles."); } final HashMap mergeMap = new HashMap(); mergeMap.put(null, mergedVC.getReference()); // the reference event (null) --> the reference allele @@ -198,7 +210,7 @@ public class GenotypingEngine { return genotypes; } - private void sanityCheckPriorityList( final ArrayList priorityList, final ArrayList eventsAtThisLoc ) { + private void validatePriorityList( final ArrayList priorityList, final ArrayList eventsAtThisLoc ) { for( final VariantContext vc : eventsAtThisLoc ) { if( !priorityList.contains(vc.getSource()) ) { throw new ReviewedStingException("Event found on haplotype that wasn't added to priority list. Something went wrong in the merging of alleles."); @@ -453,8 +465,7 @@ public class GenotypingEngine { protected static Map> createEventMapper( final int loc, final List eventsAtThisLoc, final List haplotypes ) { final Map> eventMapper = new HashMap>(eventsAtThisLoc.size()+1); - final Allele refAllele = eventsAtThisLoc.get(0).getReference(); - VariantContext refVC = eventsAtThisLoc.get(0); + VariantContext refVC = eventsAtThisLoc.get(0); // the genome loc is the only safe thing to pull out of this VC because ref/alt pairs might change reference basis eventMapper.put(new Event(null), new ArrayList()); for( final VariantContext vc : eventsAtThisLoc ) { eventMapper.put(new Event(vc), new ArrayList()); @@ -464,11 +475,11 @@ public class GenotypingEngine { for( final Haplotype h : haplotypes ) { if( h.isArtificialHaplotype() && loc == h.getArtificialAllelePosition() ) { final ArrayList alleles = new ArrayList(2); - alleles.add(refAllele); - alleles.add(h.getArtificialAllele()); + alleles.add(h.getArtificialRefAllele()); + alleles.add(h.getArtificialAltAllele()); final Event artificialVC = new Event( (new VariantContextBuilder()).source("artificialHaplotype") .alleles(alleles) - .loc(refVC.getChr(), refVC.getStart(), refVC.getStart() + refAllele.length() - 1).make() ); + .loc(refVC.getChr(), refVC.getStart(), refVC.getStart() + h.getArtificialRefAllele().length() - 1).make() ); if( eventMapper.containsKey(artificialVC) ) { eventMapper.get(artificialVC).add(h); } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssembler.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssembler.java index 0a98f54e9..87db5c9f7 100755 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssembler.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssembler.java @@ -379,8 +379,9 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine { h.setAlignmentStartHapwrtRef( swConsensus2.getAlignmentStart2wrt1() ); h.setCigar( AlignmentUtils.leftAlignIndel(swConsensus2.getCigar(), ref, h.getBases(), swConsensus2.getAlignmentStart2wrt1(), 0) ); - if ( haplotype.isArtificialHaplotype() ) - h.setArtificialAllele(haplotype.getArtificialAllele(), haplotype.getArtificialAllelePosition()); + if ( haplotype.isArtificialHaplotype() ) { + h.setArtificialEvent(haplotype.getArtificialEvent()); + } h.leftBreakPoint = leftBreakPoint; h.rightBreakPoint = rightBreakPoint; if( swConsensus2.getCigar().toString().contains("S") || swConsensus2.getCigar().getReferenceLength() != activeRegionStop - activeRegionStart ) { // protect against SW failures diff --git a/public/java/src/org/broadinstitute/sting/utils/Haplotype.java b/public/java/src/org/broadinstitute/sting/utils/Haplotype.java index 2476a666e..dfca05fcd 100755 --- a/public/java/src/org/broadinstitute/sting/utils/Haplotype.java +++ b/public/java/src/org/broadinstitute/sting/utils/Haplotype.java @@ -46,8 +46,7 @@ public class Haplotype { private int alignmentStartHapwrtRef; public int leftBreakPoint = 0; public int rightBreakPoint = 0; - private Allele artificialAllele = null; - private int artificialAllelePosition = -1; + private Event artificialEvent = null; /** * Create a simple consensus sequence with provided bases and a uniform quality over all bases of qual @@ -70,10 +69,9 @@ public class Haplotype { this(bases, 0); } - protected Haplotype( final byte[] bases, final Allele artificialAllele, final int artificialAllelePosition ) { + protected Haplotype( final byte[] bases, final Event artificialEvent ) { this(bases, 0); - this.artificialAllele = artificialAllele; - this.artificialAllelePosition = artificialAllelePosition; + this.artificialEvent = artificialEvent; } public Haplotype( final byte[] bases, final GenomeLoc loc ) { @@ -152,20 +150,27 @@ public class Haplotype { } public boolean isArtificialHaplotype() { - return artificialAllele != null; + return artificialEvent != null; } - public Allele getArtificialAllele() { - return artificialAllele; + public Event getArtificialEvent() { + return artificialEvent; + } + + public Allele getArtificialRefAllele() { + return artificialEvent.ref; + } + + public Allele getArtificialAltAllele() { + return artificialEvent.alt; } public int getArtificialAllelePosition() { - return artificialAllelePosition; + return artificialEvent.pos; } - public void setArtificialAllele(final Allele artificialAllele, final int artificialAllelePosition) { - this.artificialAllele = artificialAllele; - this.artificialAllelePosition = artificialAllelePosition; + public void setArtificialEvent( final Event artificialEvent ) { + this.artificialEvent = artificialEvent; } @Requires({"refInsertLocation >= 0"}) @@ -179,7 +184,7 @@ public class Haplotype { newHaplotypeBases = ArrayUtils.addAll(newHaplotypeBases, ArrayUtils.subarray(bases, 0, haplotypeInsertLocation)); // bases before the variant newHaplotypeBases = ArrayUtils.addAll(newHaplotypeBases, altAllele.getBases()); // the alt allele of the variant newHaplotypeBases = ArrayUtils.addAll(newHaplotypeBases, ArrayUtils.subarray(bases, haplotypeInsertLocation + refAllele.length(), bases.length)); // bases after the variant - return new Haplotype(newHaplotypeBases, altAllele, genomicInsertLocation); + return new Haplotype(newHaplotypeBases, new Event(refAllele, altAllele, genomicInsertLocation)); } public static class HaplotypeBaseComparator implements Comparator, Serializable { @@ -244,4 +249,16 @@ public class Haplotype { return haplotypeMap; } + + private static class Event { + public Allele ref; + public Allele alt; + public int pos; + + public Event( final Allele ref, final Allele alt, final int pos ) { + this.ref = ref; + this.alt = alt; + this.pos = pos; + } + } } From 676e79542aad40283da659a4bea4021a1785a170 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Wed, 9 Jan 2013 10:39:48 -0500 Subject: [PATCH 04/25] Bring CombineVariants back to public since it's used for SG. I needed to break ChromosomeCountConstants out of ChromosomeCounts to make this work. --- .../walkers/annotator/ChromosomeCounts.java | 12 +---- .../annotator/ChromosomeCountConstants.java | 44 +++++++++++++++++++ .../walkers/variantutils/CombineVariants.java | 4 +- 3 files changed, 48 insertions(+), 12 deletions(-) create mode 100755 public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCountConstants.java rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java (99%) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java index 5b1a1e236..517cb0df0 100755 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java @@ -35,10 +35,8 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompa import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; -import org.broadinstitute.variant.vcf.VCFConstants; import org.broadinstitute.variant.vcf.VCFHeaderLine; import org.broadinstitute.variant.vcf.VCFInfoHeaderLine; -import org.broadinstitute.variant.vcf.VCFStandardHeaderLines; import org.broadinstitute.variant.variantcontext.VariantContext; import org.broadinstitute.variant.variantcontext.VariantContextUtils; @@ -52,12 +50,6 @@ import java.util.*; */ public class ChromosomeCounts extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation { - public static final String[] keyNames = { VCFConstants.ALLELE_NUMBER_KEY, VCFConstants.ALLELE_COUNT_KEY, VCFConstants.ALLELE_FREQUENCY_KEY }; - public static final VCFInfoHeaderLine[] descriptions = { - VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_FREQUENCY_KEY), - VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_COUNT_KEY), - VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_NUMBER_KEY) }; - private Set founderIds = new HashSet(); public Map annotate(final RefMetaDataTracker tracker, @@ -78,8 +70,8 @@ public class ChromosomeCounts extends InfoFieldAnnotation implements StandardAnn } public List getKeyNames() { - return Arrays.asList(keyNames); + return Arrays.asList(ChromosomeCountConstants.keyNames); } - public List getDescriptions() { return Arrays.asList(descriptions); } + public List getDescriptions() { return Arrays.asList(ChromosomeCountConstants.descriptions); } } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCountConstants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCountConstants.java new file mode 100755 index 000000000..26f3bb8d9 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCountConstants.java @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2010. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.annotator; + +import org.broadinstitute.variant.vcf.VCFConstants; +import org.broadinstitute.variant.vcf.VCFInfoHeaderLine; +import org.broadinstitute.variant.vcf.VCFStandardHeaderLines; + + +/** + * Keys and descriptions for the common chromosome count annotations + */ +public class ChromosomeCountConstants { + + public static final String[] keyNames = { VCFConstants.ALLELE_NUMBER_KEY, VCFConstants.ALLELE_COUNT_KEY, VCFConstants.ALLELE_FREQUENCY_KEY }; + + public static final VCFInfoHeaderLine[] descriptions = { + VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_FREQUENCY_KEY), + VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_COUNT_KEY), + VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_NUMBER_KEY) }; +} \ No newline at end of file diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java similarity index 99% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java index 78b89d615..27b0e5201 100755 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java @@ -35,7 +35,7 @@ import org.broadinstitute.sting.gatk.walkers.Reference; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.gatk.walkers.TreeReducible; import org.broadinstitute.sting.gatk.walkers.Window; -import org.broadinstitute.sting.gatk.walkers.annotator.ChromosomeCounts; +import org.broadinstitute.sting.gatk.walkers.annotator.ChromosomeCountConstants; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.variant.GATKVCFUtils; import org.broadinstitute.variant.vcf.*; @@ -217,7 +217,7 @@ public class CombineVariants extends RodWalker implements Tree if ( SET_KEY != null ) headerLines.add(new VCFInfoHeaderLine(SET_KEY, 1, VCFHeaderLineType.String, "Source VCF for the merged record in CombineVariants")); if ( !ASSUME_IDENTICAL_SAMPLES ) - headerLines.addAll(Arrays.asList(ChromosomeCounts.descriptions)); + headerLines.addAll(Arrays.asList(ChromosomeCountConstants.descriptions)); VCFHeader vcfHeader = new VCFHeader(headerLines, samples); vcfHeader.setWriteCommandLine(!SUPPRESS_COMMAND_LINE_HEADER); vcfWriter.writeHeader(vcfHeader); From 396bce1f28ee622f3ebfd1237e975a3cd30e16ba Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Wed, 9 Jan 2013 10:51:30 -0500 Subject: [PATCH 05/25] Reverting this change until we can figure out the right thing to do here. --- .../walkers/haplotypecaller/LikelihoodCalculationEngine.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java index 8e90285d9..e416b489b 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java @@ -124,8 +124,8 @@ public class LikelihoodCalculationEngine { final Haplotype haplotype = haplotypes.get(jjj); // TODO -- need to test against a reference/position with non-standard bases - if ( !Allele.acceptableAlleleBases(haplotype.getBases(), false) ) - continue; + //if ( !Allele.acceptableAlleleBases(haplotype.getBases(), false) ) + // continue; final int haplotypeStart = ( previousHaplotypeSeen == null ? 0 : computeFirstDifferingPosition(haplotype.getBases(), previousHaplotypeSeen.getBases()) ); previousHaplotypeSeen = haplotype; From 4fa439d89e4786f645c7c3d1b28ae48cfa02d0ea Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Wed, 9 Jan 2013 11:06:10 -0500 Subject: [PATCH 06/25] Move some classes back to public because they are used in the engine. Move some test classes to protected. We should have no more public->protected dependancies now --- .../diagnostics/targets/DiagnoseTargetsIntegrationTest.java | 0 .../gatk/walkers/diagnostics/targets/LocusStatisticsUnitTest.java | 0 .../walkers/diagnostics/targets/SampleStatisticsUnitTest.java | 0 .../sting/utils/recalibration/ContextCovariateUnitTest.java | 0 .../sting/utils/recalibration/CycleCovariateUnitTest.java | 0 .../sting/utils/recalibration/QualQuantizerUnitTest.java | 0 .../sting/utils/recalibration/ReadCovariatesUnitTest.java | 0 .../sting/utils/recalibration/ReadGroupCovariateUnitTest.java | 0 .../sting/utils/recalibration/RecalDatumUnitTest.java | 0 .../sting/utils/recalibration/RecalUtilsUnitTest.java | 0 .../sting/utils/recalibration/RecalibrationReportUnitTest.java | 0 .../sting/utils/recalibration/RecalibrationTablesUnitTest.java | 0 .../sting/utils/recalibration/RecalibrationTestUtils.java | 0 .../broadinstitute/sting/gatk/walkers/coverage/CoverageUtils.java | 0 .../broadinstitute/sting/gatk/walkers/coverage/DoCOutputType.java | 0 .../broadinstitute/sting/gatk/walkers/diffengine/DiffElement.java | 0 .../broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java | 0 .../broadinstitute/sting/gatk/walkers/diffengine/DiffNode.java | 0 .../broadinstitute/sting/gatk/walkers/diffengine/DiffObjects.java | 0 .../broadinstitute/sting/gatk/walkers/diffengine/DiffValue.java | 0 .../sting/gatk/walkers/diffengine/DiffableReader.java | 0 .../broadinstitute/sting/gatk/walkers/diffengine/Difference.java | 0 22 files changed, 0 insertions(+), 0 deletions(-) rename {public => protected}/java/test/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/DiagnoseTargetsIntegrationTest.java (100%) rename {public => protected}/java/test/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/LocusStatisticsUnitTest.java (100%) rename {public => protected}/java/test/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/SampleStatisticsUnitTest.java (100%) rename {public => protected}/java/test/org/broadinstitute/sting/utils/recalibration/ContextCovariateUnitTest.java (100%) rename {public => protected}/java/test/org/broadinstitute/sting/utils/recalibration/CycleCovariateUnitTest.java (100%) rename {public => protected}/java/test/org/broadinstitute/sting/utils/recalibration/QualQuantizerUnitTest.java (100%) rename {public => protected}/java/test/org/broadinstitute/sting/utils/recalibration/ReadCovariatesUnitTest.java (100%) rename {public => protected}/java/test/org/broadinstitute/sting/utils/recalibration/ReadGroupCovariateUnitTest.java (100%) rename {public => protected}/java/test/org/broadinstitute/sting/utils/recalibration/RecalDatumUnitTest.java (100%) rename {public => protected}/java/test/org/broadinstitute/sting/utils/recalibration/RecalUtilsUnitTest.java (100%) rename {public => protected}/java/test/org/broadinstitute/sting/utils/recalibration/RecalibrationReportUnitTest.java (100%) rename {public => protected}/java/test/org/broadinstitute/sting/utils/recalibration/RecalibrationTablesUnitTest.java (100%) rename {public => protected}/java/test/org/broadinstitute/sting/utils/recalibration/RecalibrationTestUtils.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CoverageUtils.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DoCOutputType.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffElement.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffNode.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjects.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffValue.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffableReader.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/Difference.java (100%) diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/DiagnoseTargetsIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/DiagnoseTargetsIntegrationTest.java similarity index 100% rename from public/java/test/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/DiagnoseTargetsIntegrationTest.java rename to protected/java/test/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/DiagnoseTargetsIntegrationTest.java diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/LocusStatisticsUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/LocusStatisticsUnitTest.java similarity index 100% rename from public/java/test/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/LocusStatisticsUnitTest.java rename to protected/java/test/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/LocusStatisticsUnitTest.java diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/SampleStatisticsUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/SampleStatisticsUnitTest.java similarity index 100% rename from public/java/test/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/SampleStatisticsUnitTest.java rename to protected/java/test/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/SampleStatisticsUnitTest.java diff --git a/public/java/test/org/broadinstitute/sting/utils/recalibration/ContextCovariateUnitTest.java b/protected/java/test/org/broadinstitute/sting/utils/recalibration/ContextCovariateUnitTest.java similarity index 100% rename from public/java/test/org/broadinstitute/sting/utils/recalibration/ContextCovariateUnitTest.java rename to protected/java/test/org/broadinstitute/sting/utils/recalibration/ContextCovariateUnitTest.java diff --git a/public/java/test/org/broadinstitute/sting/utils/recalibration/CycleCovariateUnitTest.java b/protected/java/test/org/broadinstitute/sting/utils/recalibration/CycleCovariateUnitTest.java similarity index 100% rename from public/java/test/org/broadinstitute/sting/utils/recalibration/CycleCovariateUnitTest.java rename to protected/java/test/org/broadinstitute/sting/utils/recalibration/CycleCovariateUnitTest.java diff --git a/public/java/test/org/broadinstitute/sting/utils/recalibration/QualQuantizerUnitTest.java b/protected/java/test/org/broadinstitute/sting/utils/recalibration/QualQuantizerUnitTest.java similarity index 100% rename from public/java/test/org/broadinstitute/sting/utils/recalibration/QualQuantizerUnitTest.java rename to protected/java/test/org/broadinstitute/sting/utils/recalibration/QualQuantizerUnitTest.java diff --git a/public/java/test/org/broadinstitute/sting/utils/recalibration/ReadCovariatesUnitTest.java b/protected/java/test/org/broadinstitute/sting/utils/recalibration/ReadCovariatesUnitTest.java similarity index 100% rename from public/java/test/org/broadinstitute/sting/utils/recalibration/ReadCovariatesUnitTest.java rename to protected/java/test/org/broadinstitute/sting/utils/recalibration/ReadCovariatesUnitTest.java diff --git a/public/java/test/org/broadinstitute/sting/utils/recalibration/ReadGroupCovariateUnitTest.java b/protected/java/test/org/broadinstitute/sting/utils/recalibration/ReadGroupCovariateUnitTest.java similarity index 100% rename from public/java/test/org/broadinstitute/sting/utils/recalibration/ReadGroupCovariateUnitTest.java rename to protected/java/test/org/broadinstitute/sting/utils/recalibration/ReadGroupCovariateUnitTest.java diff --git a/public/java/test/org/broadinstitute/sting/utils/recalibration/RecalDatumUnitTest.java b/protected/java/test/org/broadinstitute/sting/utils/recalibration/RecalDatumUnitTest.java similarity index 100% rename from public/java/test/org/broadinstitute/sting/utils/recalibration/RecalDatumUnitTest.java rename to protected/java/test/org/broadinstitute/sting/utils/recalibration/RecalDatumUnitTest.java diff --git a/public/java/test/org/broadinstitute/sting/utils/recalibration/RecalUtilsUnitTest.java b/protected/java/test/org/broadinstitute/sting/utils/recalibration/RecalUtilsUnitTest.java similarity index 100% rename from public/java/test/org/broadinstitute/sting/utils/recalibration/RecalUtilsUnitTest.java rename to protected/java/test/org/broadinstitute/sting/utils/recalibration/RecalUtilsUnitTest.java diff --git a/public/java/test/org/broadinstitute/sting/utils/recalibration/RecalibrationReportUnitTest.java b/protected/java/test/org/broadinstitute/sting/utils/recalibration/RecalibrationReportUnitTest.java similarity index 100% rename from public/java/test/org/broadinstitute/sting/utils/recalibration/RecalibrationReportUnitTest.java rename to protected/java/test/org/broadinstitute/sting/utils/recalibration/RecalibrationReportUnitTest.java diff --git a/public/java/test/org/broadinstitute/sting/utils/recalibration/RecalibrationTablesUnitTest.java b/protected/java/test/org/broadinstitute/sting/utils/recalibration/RecalibrationTablesUnitTest.java similarity index 100% rename from public/java/test/org/broadinstitute/sting/utils/recalibration/RecalibrationTablesUnitTest.java rename to protected/java/test/org/broadinstitute/sting/utils/recalibration/RecalibrationTablesUnitTest.java diff --git a/public/java/test/org/broadinstitute/sting/utils/recalibration/RecalibrationTestUtils.java b/protected/java/test/org/broadinstitute/sting/utils/recalibration/RecalibrationTestUtils.java similarity index 100% rename from public/java/test/org/broadinstitute/sting/utils/recalibration/RecalibrationTestUtils.java rename to protected/java/test/org/broadinstitute/sting/utils/recalibration/RecalibrationTestUtils.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CoverageUtils.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CoverageUtils.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CoverageUtils.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CoverageUtils.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DoCOutputType.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DoCOutputType.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DoCOutputType.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DoCOutputType.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffElement.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffElement.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffElement.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffElement.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffNode.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffNode.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffNode.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffNode.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjects.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjects.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjects.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjects.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffValue.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffValue.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffValue.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffValue.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffableReader.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffableReader.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffableReader.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffableReader.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/Difference.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/Difference.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/Difference.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/Difference.java From 3a0dd4b175e960522061eb7b3f367bfe73ef05e1 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Wed, 9 Jan 2013 11:12:28 -0500 Subject: [PATCH 07/25] Oops, I broke the build. NOW we shouldn't have any more public->protected dependancies. --- .../sting/gatk/walkers/variantutils/SelectVariants.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java index cffd405b1..dd92961cf 100755 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java @@ -33,6 +33,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.gatk.walkers.TreeReducible; +import org.broadinstitute.sting.gatk.walkers.annotator.ChromosomeCountConstants; import org.broadinstitute.sting.gatk.walkers.annotator.ChromosomeCounts; import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeLikelihoodsCalculationModel; import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedArgumentCollection; @@ -423,7 +424,7 @@ public class SelectVariants extends RodWalker implements TreeR headerLines.add(new VCFInfoHeaderLine("AF_Orig", 1, VCFHeaderLineType.Float, "Original AF")); headerLines.add(new VCFInfoHeaderLine("AN_Orig", 1, VCFHeaderLineType.Integer, "Original AN")); } - headerLines.addAll(Arrays.asList(ChromosomeCounts.descriptions)); + headerLines.addAll(Arrays.asList(ChromosomeCountConstants.descriptions)); headerLines.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.DEPTH_KEY)); for (int i = 0; i < SELECT_EXPRESSIONS.size(); i++) { From c1de92b5119b0c152172efe1be72647947382caa Mon Sep 17 00:00:00 2001 From: Chris Hartl Date: Wed, 9 Jan 2013 13:16:06 -0500 Subject: [PATCH 08/25] Add in some todo items --- .../gatk/walkers/variantutils/GenotypeConcordance.java | 10 ++++++++++ .../variantutils/ConcordanceMetricsUnitTest.java | 5 +++++ 2 files changed, 15 insertions(+) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordance.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordance.java index 7991e3432..ff69b9b11 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordance.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordance.java @@ -39,6 +39,16 @@ public class GenotypeConcordance extends RodWalker evalSamples; List compSamples; + // todo -- integration test coverage + // todo -- deal with occurrences like: + // Eval: 20 4000 A C + // Eval: 20 4000 A AC + // Comp: 20 4000 A C + // currently this results in a warning and skipping + // todo -- extend to multiple eval, multiple comp + // todo -- table with "proportion of overlapping sites" (not just eval/comp margins) + + public ConcordanceMetrics reduceInit() { Map headerMap = GATKVCFUtils.getVCFHeadersFromRods(getToolkit(), Arrays.asList(evalBinding,compBinding)); VCFHeader evalHeader = headerMap.get(evalBinding.getName()); diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetricsUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetricsUnitTest.java index 9a5b18735..50d71c532 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetricsUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetricsUnitTest.java @@ -34,6 +34,11 @@ import net.sf.picard.reference.ReferenceSequenceFile; public class ConcordanceMetricsUnitTest extends BaseTest { + // todo -- coverage for several sites (3,4) + // todo -- coverage for calculation of table margins + // todo -- coverage for site concordance + // todo -- coverage for disjoint and mostly-disjoint sample sets + private static ReferenceSequenceFile seq; private GenomeLocParser genomeLocParser; From 6787f86803d067436d442030b519112154153221 Mon Sep 17 00:00:00 2001 From: Chris Hartl Date: Wed, 9 Jan 2013 13:23:24 -0500 Subject: [PATCH 09/25] Eliminate the import of DiploidGenotype, which switched public/private underneath me but for some reason didn't stop me from compiling... --- .../sting/gatk/walkers/variantutils/ConcordanceMetrics.java | 1 - .../sting/gatk/walkers/variantutils/GenotypeConcordance.java | 4 ---- 2 files changed, 5 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetrics.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetrics.java index ac6752277..f71bc4a01 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetrics.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetrics.java @@ -4,7 +4,6 @@ import com.google.java.contract.Ensures; import com.google.java.contract.Invariant; import com.google.java.contract.Requires; import org.broad.tribble.util.ParsingUtils; -import org.broadinstitute.sting.gatk.walkers.genotyper.DiploidGenotype; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.variant.variantcontext.*; import org.broadinstitute.variant.vcf.VCFHeader; diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordance.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordance.java index ff69b9b11..de7b14ddb 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordance.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordance.java @@ -1,20 +1,16 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; import org.broadinstitute.sting.commandline.*; -import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.report.GATKReport; import org.broadinstitute.sting.gatk.report.GATKReportTable; import org.broadinstitute.sting.gatk.walkers.RodWalker; -import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.variant.GATKVCFUtils; -import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils; import org.broadinstitute.variant.variantcontext.*; import org.broadinstitute.variant.vcf.VCFHeader; -import org.broadinstitute.variant.vcf.VCFUtils; import java.io.PrintStream; import java.util.*; From 487fb2afb43693f6cb3c5fa52d5c98eff7451ddf Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Wed, 9 Jan 2013 15:30:46 -0500 Subject: [PATCH 10/25] Bug fix for the case of overlapping assembled and partially-assembled events created by the HC. Unfortunately the symbolic allele can't be combined with the indel allele because the reference basis will change. --- .../sting/gatk/walkers/haplotypecaller/GenotypingEngine.java | 4 ++-- .../variant/variantcontext/VariantContextUtils.java | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java index 7c25ab551..42f5e1455 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java @@ -92,6 +92,7 @@ public class GenotypingEngine { cleanUpSymbolicUnassembledEvents( haplotypes ); if( !in_GGA_mode && samples.size() >= 10 ) { // if not in GGA mode and have at least 10 samples try to create MNP and complex events by looking at LD structure mergeConsecutiveEventsBasedOnLD( haplotypes, samples, haplotypeReadMap, startPosKeySet, ref, refLoc ); + cleanUpSymbolicUnassembledEvents( haplotypes ); // the newly created merged events could be overlapping the unassembled events } if( in_GGA_mode ) { for( final VariantContext compVC : activeAllelesToGenotype ) { @@ -261,7 +262,6 @@ public class GenotypingEngine { return returnMap; } - protected static void cleanUpSymbolicUnassembledEvents( final List haplotypes ) { final ArrayList haplotypesToRemove = new ArrayList(); for( final Haplotype h : haplotypes ) { @@ -269,7 +269,7 @@ public class GenotypingEngine { if( vc.isSymbolic() ) { for( final Haplotype h2 : haplotypes ) { for( final VariantContext vc2 : h2.getEventMap().values() ) { - if( vc.getStart() == vc2.getStart() && vc2.isIndel() ) { + if( vc.getStart() == vc2.getStart() && (vc2.isIndel() || vc2.isMNP()) ) { // unfortunately symbolic alleles can't currently be combined with non-point events haplotypesToRemove.add(h); break; } diff --git a/public/java/src/org/broadinstitute/variant/variantcontext/VariantContextUtils.java b/public/java/src/org/broadinstitute/variant/variantcontext/VariantContextUtils.java index 4d1e70340..2d74a1a4a 100755 --- a/public/java/src/org/broadinstitute/variant/variantcontext/VariantContextUtils.java +++ b/public/java/src/org/broadinstitute/variant/variantcontext/VariantContextUtils.java @@ -710,7 +710,7 @@ public class VariantContextUtils { for ( VariantContext vc : VCs ) { // look at previous variant contexts of different type. If: - // a) otherVC has alleles which are subset of vc, remove otherVC from its list and add otherVC to vc's list + // a) otherVC has alleles which are subset of vc, remove otherVC from its list and add otherVC to vc's list // b) vc has alleles which are subset of otherVC. Then, add vc to otherVC's type list (rather, do nothing since vc will be added automatically to its list) // c) neither: do nothing, just add vc to its own list boolean addtoOwnList = true; From 1a18947abfa37e8df106e6ffd485c765c004b28d Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Wed, 9 Jan 2013 15:54:02 -0500 Subject: [PATCH 11/25] Adding new command line argument requested on the forum to control the maximum number of haplotypes that are sent forward for genotyping. In the presence of a large degree of heterozygosity the current algorithm breaks down and so this argument would need to be increased. --- .../gatk/walkers/haplotypecaller/HaplotypeCaller.java | 7 ++++++- .../haplotypecaller/LikelihoodCalculationEngine.java | 4 ++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java index cde089b34..acc76815e 100755 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java @@ -136,6 +136,10 @@ public class HaplotypeCaller extends ActiveRegionWalker implem @Argument(fullName="gcpHMM", shortName="gcpHMM", doc="Flat gap continuation penalty for use in the Pair HMM", required = false) protected int gcpHMM = 10; + @Advanced + @Argument(fullName="maxNumHaplotypesInPopulation", shortName="maxNumHaplotypesInPopulation", doc="Maximum number of haplotypes to consider for your population. This number will probably need to be increased when calling organisms with high heterozygosity.", required = false) + protected int maxNumHaplotypesInPopulation = 13; + @Advanced @Argument(fullName="minKmer", shortName="minKmer", doc="Minimum kmer length to use in the assembly graph", required = false) protected int minKmer = 11; @@ -414,7 +418,8 @@ public class HaplotypeCaller extends ActiveRegionWalker implem final Map> perSampleFilteredReadList = splitReadsBySample( filteredReads ); // subset down to only the best haplotypes to be genotyped in all samples ( in GGA mode use all discovered haplotypes ) - final ArrayList bestHaplotypes = ( UG_engine.getUAC().GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ? likelihoodCalculationEngine.selectBestHaplotypes( haplotypes, stratifiedReadMap ) : haplotypes ); + final ArrayList bestHaplotypes = ( UG_engine.getUAC().GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ? + likelihoodCalculationEngine.selectBestHaplotypes( haplotypes, stratifiedReadMap, maxNumHaplotypesInPopulation ) : haplotypes ); for( final VariantContext call : genotypingEngine.assignGenotypeLikelihoods( UG_engine, bestHaplotypes, diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java index e416b489b..6ca1ec1e8 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java @@ -273,7 +273,7 @@ public class LikelihoodCalculationEngine { @Requires({"haplotypes.size() > 0"}) @Ensures({"result.size() <= haplotypes.size()"}) - public ArrayList selectBestHaplotypes( final ArrayList haplotypes, final Map stratifiedReadMap ) { + public ArrayList selectBestHaplotypes( final ArrayList haplotypes, final Map stratifiedReadMap, final int maxNumHaplotypesInPopulation ) { final int numHaplotypes = haplotypes.size(); final Set sampleKeySet = stratifiedReadMap.keySet(); @@ -287,7 +287,7 @@ public class LikelihoodCalculationEngine { int hap1 = 0; int hap2 = 0; //double bestElement = Double.NEGATIVE_INFINITY; - final int maxChosenHaplotypes = Math.min( 13, sampleKeySet.size() * 2 + 1 ); + final int maxChosenHaplotypes = Math.min( maxNumHaplotypesInPopulation, sampleKeySet.size() * 2 + 1 ); while( bestHaplotypesIndexList.size() < maxChosenHaplotypes ) { double maxElement = Double.NEGATIVE_INFINITY; for( int iii = 0; iii < numHaplotypes; iii++ ) { From 4a8466783a30f8761f2a6837aaf1d6abe0d9e676 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Mon, 7 Jan 2013 14:46:28 -0500 Subject: [PATCH 12/25] License Parser and update all license scripts * Implemented a script that parses and replaces the license (to be used by git on every commit) * Implemented a shell script that makes use of the license parser to add the license to all java files in the repo GSA-685 GSATDG-6 GSATDG-16 --- licensing/private_license.txt | 7 +++++++ licensing/protected_license.txt | 7 +++++++ licensing/public_license.txt | 7 +++++++ 3 files changed, 21 insertions(+) create mode 100644 licensing/private_license.txt create mode 100644 licensing/protected_license.txt create mode 100644 licensing/public_license.txt diff --git a/licensing/private_license.txt b/licensing/private_license.txt new file mode 100644 index 000000000..e5609db65 --- /dev/null +++ b/licensing/private_license.txt @@ -0,0 +1,7 @@ +PRIVATE PRIVATE PRIVATE + +This is a test license for the GATK +all files will abide to it +one license to rule them all + +PRIVATE PRIVATE PRIVATE diff --git a/licensing/protected_license.txt b/licensing/protected_license.txt new file mode 100644 index 000000000..d3966e17d --- /dev/null +++ b/licensing/protected_license.txt @@ -0,0 +1,7 @@ +PROTECTED PROTECTED PROTECTED + +This is a test license for the GATK +all files will abide to it +one license to rule them all + +PROTECTED PROTECTED PROTECTED diff --git a/licensing/public_license.txt b/licensing/public_license.txt new file mode 100644 index 000000000..84e93af8e --- /dev/null +++ b/licensing/public_license.txt @@ -0,0 +1,7 @@ +PUBLIC PUBLIC PUBLIC + +This is a test license for the GATK +all files will abide to it +one license to rule them all + +PUBLIC PUBLIC PUBLIC From 3e52ce5fa8d6f8ec80c73b65959b6604f035bcef Mon Sep 17 00:00:00 2001 From: Joel Thibault Date: Thu, 10 Jan 2013 11:44:24 -0500 Subject: [PATCH 13/25] Remove DepthOfCoverage.java because it is no longer public - Move Pileup.java and PrintReads.java to their new homes --- public/packages/GATKEngine.xml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/public/packages/GATKEngine.xml b/public/packages/GATKEngine.xml index 27d2afa47..42b3a4d6e 100644 --- a/public/packages/GATKEngine.xml +++ b/public/packages/GATKEngine.xml @@ -53,11 +53,10 @@ - - - + + From f801cb3be51ee9b0e01c57cf9a55b2515e2a5d41 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Thu, 10 Jan 2013 13:46:49 -0500 Subject: [PATCH 14/25] Updating Queue maven version to 0.0.2 - After changes to the repositories, we are making sure that cmi-queueext is getting the right file. --- build.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.xml b/build.xml index 3aead62d6..2dfff0cc2 100644 --- a/build.xml +++ b/build.xml @@ -991,7 +991,7 @@ - +