From e56d52006a9a31079960bd55dc9249b10892c93b Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 16 Nov 2011 10:39:17 -0500 Subject: [PATCH] Continuing bugfixes to get new VC working --- .../beagle/BeagleOutputToVCFWalker.java | 4 +- .../walkers/variantutils/SelectVariants.java | 4 + .../variantcontext/GenotypeCollection.java | 78 +++++++++++++++---- .../utils/variantcontext/VariantContext.java | 13 +++- ...gatingAlternateAllelesIntegrationTest.java | 51 ------------ 5 files changed, 77 insertions(+), 73 deletions(-) delete mode 100644 public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesIntegrationTest.java diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java index 549c26575..297203aec 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java @@ -333,11 +333,11 @@ public class BeagleOutputToVCFWalker extends RodWalker { VariantContext filteredVC; if ( beagleVarCounts > 0 || DONT_FILTER_MONOMORPHIC_SITES ) - filteredVC = new VariantContext("outputvcf", VCFConstants.EMPTY_ID_FIELD, vc_input.getChr(), vc_input.getStart(), vc_input.getEnd(), vc_input.getAlleles(), genotypes, vc_input.getNegLog10PError(), vc_input.filtersWereApplied() ? vc_input.getFilters() : null, vc_input.getAttributes()); + filteredVC = new VariantContext("outputvcf", vc_input.getID(), vc_input.getChr(), vc_input.getStart(), vc_input.getEnd(), vc_input.getAlleles(), genotypes, vc_input.getNegLog10PError(), vc_input.filtersWereApplied() ? vc_input.getFilters() : null, vc_input.getAttributes()); else { Set removedFilters = vc_input.filtersWereApplied() ? new HashSet(vc_input.getFilters()) : new HashSet(1); removedFilters.add(String.format("BGL_RM_WAS_%s",vc_input.getAlternateAllele(0))); - filteredVC = new VariantContext("outputvcf", VCFConstants.EMPTY_ID_FIELD, vc_input.getChr(), vc_input.getStart(), vc_input.getEnd(), new HashSet(Arrays.asList(vc_input.getReference())), genotypes, vc_input.getNegLog10PError(), removedFilters, vc_input.getAttributes()); + filteredVC = new VariantContext("outputvcf", vc_input.getID(), vc_input.getChr(), vc_input.getStart(), vc_input.getEnd(), new HashSet(Arrays.asList(vc_input.getReference())), genotypes, vc_input.getNegLog10PError(), removedFilters, vc_input.getAttributes()); } HashMap attributes = new HashMap(filteredVC.getAttributes()); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java index 3c92bf00f..6fec0fac2 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java @@ -654,7 +654,10 @@ public class SelectVariants extends RodWalker { if ( samples == null || samples.isEmpty() ) return vc; +// logger.info("Genotypes in full vc: " + vc.getGenotypes()); +// logger.info("My own sub : " + vc.getGenotypes().subsetToSamples(samples)); VariantContext sub = vc.subContextFromSamples(samples, vc.getAlleles()); +// logger.info("Genotypes in sub vc: " + sub.getGenotypes()); // if we have fewer alternate alleles in the selected VC than in the original VC, we need to strip out the GL/PLs (because they are no longer accurate) if ( vc.getAlleles().size() != sub.getAlleles().size() ) @@ -691,6 +694,7 @@ public class SelectVariants extends RodWalker { sub = VariantContext.modifyAttributes(sub, attributes); +// logger.info("Genotypes in final vc: " + sub.getGenotypes()); return sub; } diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeCollection.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeCollection.java index 6ccb2a9ff..4dbc23e63 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeCollection.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeCollection.java @@ -30,11 +30,13 @@ import java.util.*; * */ public class GenotypeCollection implements List { - public final static GenotypeCollection NO_GENOTYPES = new GenotypeCollection(); + public final static GenotypeCollection NO_GENOTYPES = + new GenotypeCollection(new ArrayList(0), new HashMap(0), new HashSet(0), true); + Set sampleNamesInOrder = null; Map sampleNameToOffset = null; boolean cacheIsInvalid = true; - final ArrayList genotypes; + List genotypes; boolean immutable = false; // --------------------------------------------------------------------------- @@ -54,6 +56,19 @@ public class GenotypeCollection implements List { private GenotypeCollection(final ArrayList genotypes, final boolean immutable) { this.genotypes = genotypes; this.immutable = immutable; + this.sampleNameToOffset = null; + this.cacheIsInvalid = true; + } + + private GenotypeCollection(final ArrayList genotypes, + final Map sampleNameToOffset, + final Set sampleNamesInOrder, + final boolean immutable) { + this.genotypes = genotypes; + this.immutable = immutable; + this.sampleNameToOffset = sampleNameToOffset; + this.sampleNamesInOrder = sampleNamesInOrder; + this.cacheIsInvalid = false; } // --------------------------------------------------------------------------- @@ -108,12 +123,8 @@ public class GenotypeCollection implements List { // // --------------------------------------------------------------------------- - public final GenotypeCollection mutable() { - immutable = false; - return this; - } - public final GenotypeCollection immutable() { + this.genotypes = Collections.unmodifiableList(genotypes); immutable = true; return this; } @@ -135,17 +146,20 @@ public class GenotypeCollection implements List { private void invalidateCaches() { cacheIsInvalid = true; - if ( sampleNameToOffset != null ) sampleNameToOffset.clear(); + sampleNamesInOrder = null; + sampleNameToOffset = null; } private void buildCache() { cacheIsInvalid = false; + sampleNamesInOrder = new TreeSet(); + sampleNameToOffset = new HashMap(genotypes.size()); - if ( sampleNameToOffset == null ) - sampleNameToOffset = new HashMap(genotypes.size()); - - for ( int i = 0; i < genotypes.size(); i++ ) - sampleNameToOffset.put(genotypes.get(i).getSampleName(), i); + for ( int i = 0; i < genotypes.size(); i++ ) { + final Genotype g = genotypes.get(i); + sampleNamesInOrder.add(g.getSampleName()); + sampleNameToOffset.put(g.getSampleName(), i); + } } @@ -341,7 +355,8 @@ public class GenotypeCollection implements List { } public Set getSampleNamesOrderedByName() { - return new TreeSet(getSampleNames()); + buildCache(); + return sampleNamesInOrder; } public boolean containsSample(final String sample) { @@ -365,10 +380,41 @@ public class GenotypeCollection implements List { return NO_GENOTYPES; else { GenotypeCollection subset = create(samples.size()); - for ( final Genotype g : genotypes ) - if ( samples.contains(g.getSampleName()) ) + for ( final Genotype g : genotypes ) { + if ( samples.contains(g.getSampleName()) ) { subset.add(g); + } + } return subset; } } + + @Override + public String toString() { + final List gS = new ArrayList(); + for ( final Genotype g : this.iterateInSampleNameOrder() ) + gS.add(g.toString()); + return "[" + join(",", gS) + "]"; + } + + // copied from Utils + private static String join(final String separator, final Collection objects) { + if (objects.isEmpty()) { // fast path for empty collection + return ""; + } else { + final Iterator iter = objects.iterator(); + final T first = iter.next(); + + if ( ! iter.hasNext() ) // fast path for singleton collections + return first.toString(); + else { // full path for 2+ collection that actually need a join + final StringBuilder ret = new StringBuilder(first.toString()); + while(iter.hasNext()) { + ret.append(separator); + ret.append(iter.next().toString()); + } + return ret.toString(); + } + } + } } diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java index 77dc88cc5..d0f88e2ec 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java @@ -411,7 +411,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati // we need to make this a LinkedHashSet in case the user prefers a given ordering of alleles this.alleles = makeAlleles(alleles); - if ( genotypes == null ) { + if ( genotypes == null || genotypes == NO_GENOTYPES ) { this.genotypes = NO_GENOTYPES; } else { this.genotypes = genotypes.immutable(); @@ -543,8 +543,10 @@ public class VariantContext implements Feature { // to enable tribble intergrati // } public VariantContext subContextFromSamples(Set sampleNames, Collection alleles) { + loadGenotypes(); + GenotypeCollection newGenotypes = genotypes.subsetToSamples(sampleNames); return new VariantContext(getSource(), getID(), contig, start, stop, alleles, - genotypes.subsetToSamples(sampleNames), + newGenotypes, getNegLog10PError(), filtersWereApplied() ? getFilters() : null, getAttributes(), @@ -552,6 +554,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati } public VariantContext subContextFromSamples(Set sampleNames) { + loadGenotypes(); GenotypeCollection newGenotypes = genotypes.subsetToSamples(sampleNames); return new VariantContext(getSource(), getID(), contig, start, stop, allelesOfGenotypes(newGenotypes), newGenotypes, @@ -562,7 +565,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati } public VariantContext subContextFromSample(String sampleName) { - return subContextFromSamples(new HashSet(Arrays.asList(sampleName))); + return subContextFromSamples(Collections.singleton(sampleName)); } /** @@ -1460,7 +1463,9 @@ public class VariantContext implements Feature { // to enable tribble intergrati public String toString() { return String.format("[VC %s @ %s of type=%s alleles=%s attr=%s GT=%s", getSource(), contig + ":" + (start - stop == 0 ? start : start + "-" + stop), this.getType(), - ParsingUtils.sortList(this.getAlleles()), ParsingUtils.sortedString(this.getAttributes()), this.getGenotypesSortedByName()); + ParsingUtils.sortList(this.getAlleles()), + ParsingUtils.sortedString(this.getAttributes()), + this.getGenotypes()); } // protected basic manipulation routines diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesIntegrationTest.java deleted file mode 100644 index db1e4a82f..000000000 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesIntegrationTest.java +++ /dev/null @@ -1,51 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.phasing; - -import org.broadinstitute.sting.WalkerTest; -import org.testng.annotations.Test; - -import java.util.Arrays; - -public class MergeSegregatingAlternateAllelesIntegrationTest extends WalkerTest { - - public static String baseTestString(String reference, String VCF, int maxDist) { - return "-T MergeSegregatingAlternateAlleles" + - " -R " + reference + - " --variant:vcf " + validationDataLocation + VCF + - " --maxGenomicDistance " + maxDist + - " -o %s" + - " -NO_HEADER"; - } - - - @Test - public void test1() { - WalkerTestSpec spec = new WalkerTestSpec( - baseTestString(hg18Reference, "merging_test_chr20_556259_756570.vcf", 1) - + " -L chr20:556259-756570", - 1, - Arrays.asList("af5e1370822551c0c6f50f23447dc627")); - executeTest("Merge sites within genomic distance of 1 [TEST ONE]", spec); - } - - @Test - public void test2() { - WalkerTestSpec spec = new WalkerTestSpec( - baseTestString(hg18Reference, "merging_test_chr20_556259_756570.vcf", 10) - + " -L chr20:556259-756570", - 1, - Arrays.asList("dd8c44ae1ef059a7fe85399467e102eb")); - executeTest("Merge sites within genomic distance of 10 [TEST TWO]", spec); - } - - @Test - public void test3() { - WalkerTestSpec spec = new WalkerTestSpec( - baseTestString(hg18Reference, "merging_test_chr20_556259_756570.vcf", 100) - + " -L chr20:556259-756570", - 1, - Arrays.asList("f81fd72ecaa57b3215406fcea860bcc5")); - executeTest("Merge sites within genomic distance of 100 [TEST THREE]", spec); - } - - -} \ No newline at end of file