diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java index 8585104d5..fee87b21f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java @@ -746,11 +746,9 @@ public class PhaseByTransmission extends RodWalker, HashMa if (tracker != null) { VariantContext vc = tracker.getFirstValue(variantCollection.variants, context.getLocation()); + VariantContextBuilder builder = new VariantContextBuilder(vc); - GenotypesContext genotypeMap = vc.getGenotypes(); - - int mvCount; - + GenotypesContext genotypesContext = GenotypesContext.copy(vc.getGenotypes()); for (Sample sample : trios) { Genotype mother = vc.getGenotype(sample.getMaternalID()); Genotype father = vc.getGenotype(sample.getPaternalID()); @@ -761,18 +759,18 @@ public class PhaseByTransmission extends RodWalker, HashMa continue; ArrayList trioGenotypes = new ArrayList(3); - mvCount = phaseTrioGenotypes(vc.getReference(), vc.getAltAlleleWithHighestAlleleCount(), mother, father, child,trioGenotypes); + final int mvCount = phaseTrioGenotypes(vc.getReference(), vc.getAltAlleleWithHighestAlleleCount(), mother, father, child,trioGenotypes); Genotype phasedMother = trioGenotypes.get(0); Genotype phasedFather = trioGenotypes.get(1); Genotype phasedChild = trioGenotypes.get(2); //Fill the genotype map with the new genotypes and increment metrics counters - genotypeMap.add(phasedChild); + genotypesContext.replace(phasedChild); if(mother != null){ - genotypeMap.add(phasedMother); + genotypesContext.replace(phasedMother); if(father != null){ - genotypeMap.add(phasedFather); + genotypesContext.replace(phasedFather); updateTrioMetricsCounters(phasedMother,phasedFather,phasedChild,mvCount,metricsCounters); mvfLine = String.format("%s\t%d\t%s\t%s\t%s\t%s\t%s:%s:%s:%s\t%s:%s:%s:%s\t%s:%s:%s:%s",vc.getChr(),vc.getStart(),vc.getFilters(),vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY),sample.toString(),phasedMother.getAttribute(TRANSMISSION_PROBABILITY_TAG_NAME),phasedMother.getGenotypeString(),phasedMother.getAttribute(VCFConstants.DEPTH_KEY),phasedMother.getAttribute("AD"),phasedMother.getLikelihoods().toString(),phasedFather.getGenotypeString(),phasedFather.getAttribute(VCFConstants.DEPTH_KEY),phasedFather.getAttribute("AD"),phasedFather.getLikelihoods().toString(),phasedChild.getGenotypeString(),phasedChild.getAttribute(VCFConstants.DEPTH_KEY),phasedChild.getAttribute("AD"),phasedChild.getLikelihoods().toString()); if(!(phasedMother.getType()==mother.getType() && phasedFather.getType()==father.getType() && phasedChild.getType()==child.getType())) @@ -786,7 +784,7 @@ public class PhaseByTransmission extends RodWalker, HashMa } } else{ - genotypeMap.add(phasedFather); + genotypesContext.replace(phasedFather); updatePairMetricsCounters(phasedFather,phasedChild,mvCount,metricsCounters); if(!(phasedFather.getType()==father.getType() && phasedChild.getType()==child.getType())) metricsCounters.put(NUM_GENOTYPES_MODIFIED,metricsCounters.get(NUM_GENOTYPES_MODIFIED)+1); @@ -797,10 +795,10 @@ public class PhaseByTransmission extends RodWalker, HashMa //TODO: ADAPT FOR PAIRS TOO!! if(mvCount>0 && mvFile != null) mvFile.println(mvfLine); - } - vcfWriter.add(new VariantContextBuilder(vc).genotypes(genotypeMap).make()); + builder.genotypes(genotypesContext); + vcfWriter.add(builder.make()); } return metricsCounters; } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java index abd81fe61..ee3184dc2 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java @@ -10,10 +10,7 @@ import org.broad.tribble.util.BlockCompressedInputStream; import org.broad.tribble.util.ParsingUtils; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.GenotypesContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder; +import org.broadinstitute.sting.utils.variantcontext.*; import java.io.*; import java.util.*; @@ -255,11 +252,14 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec, */ private VariantContext parseVCFLine(String[] parts) { VariantContextBuilder builder = new VariantContextBuilder(); + builder.source(getName()); + // increment the line count lineNo++; // parse out the required fields - builder.chr(getCachedString(parts[0])); + final String chr = getCachedString(parts[0]); + builder.chr(chr); int pos = Integer.valueOf(parts[1]); builder.start(pos); @@ -294,9 +294,8 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec, // do we have genotyping data if (parts.length > NUM_STANDARD_FIELDS) { - builder.attribute(VariantContext.UNPARSED_GENOTYPE_MAP_KEY, new String(parts[8])); - builder.attribute(VariantContext.UNPARSED_GENOTYPE_PARSER_KEY, this); - builder.genotypesAreUnparsed(); + LazyGenotypesContext lazy = new LazyGenotypesContext(this, parts[8], chr, pos, alleles, header.getGenotypeSamples().size()); + builder.genotypesNoValidation(lazy); } VariantContext vc = null; diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/StandardVCFWriter.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/StandardVCFWriter.java index 92c8840fb..7a496cb7c 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/StandardVCFWriter.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/StandardVCFWriter.java @@ -219,9 +219,6 @@ public class StandardVCFWriter extends IndexingVCFWriter { Map infoFields = new TreeMap(); for ( Map.Entry field : vc.getAttributes().entrySet() ) { String key = field.getKey(); - if ( key.equals(VariantContext.UNPARSED_GENOTYPE_MAP_KEY) || key.equals(VariantContext.UNPARSED_GENOTYPE_PARSER_KEY) ) - continue; - String outputValue = formatVCFField(field.getValue()); if ( outputValue != null ) infoFields.put(key, outputValue); @@ -229,9 +226,10 @@ public class StandardVCFWriter extends IndexingVCFWriter { writeInfoString(infoFields); // FORMAT - if ( vc.hasAttribute(VariantContext.UNPARSED_GENOTYPE_MAP_KEY) ) { + final GenotypesContext gc = vc.getGenotypes(); + if ( gc instanceof LazyGenotypesContext && ((LazyGenotypesContext)gc).getUnparsedGenotypeData() != null) { mWriter.write(VCFConstants.FIELD_SEPARATOR); - mWriter.write(vc.getAttributeAsString(VariantContext.UNPARSED_GENOTYPE_MAP_KEY, "")); + mWriter.write(((LazyGenotypesContext)gc).getUnparsedGenotypeData()); } else { List genotypeAttributeKeys = new ArrayList(); if ( vc.hasGenotypes() ) { diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCF3Codec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCF3Codec.java index 7d71a9c5a..5e6e2e94a 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCF3Codec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCF3Codec.java @@ -124,7 +124,7 @@ public class VCF3Codec extends AbstractVCFCodec { int nParts = ParsingUtils.split(str, genotypeParts, VCFConstants.FIELD_SEPARATOR_CHAR); - GenotypesContext genotypes = GenotypesContext.create(nParts); + ArrayList genotypes = new ArrayList(nParts); // get the format keys int nGTKeys = ParsingUtils.split(genotypeParts[0], genotypeKeyArray, VCFConstants.GENOTYPE_FIELD_SEPARATOR_CHAR); @@ -191,7 +191,7 @@ public class VCF3Codec extends AbstractVCFCodec { } } - return genotypes; + return GenotypesContext.create(genotypes, header.sampleNameToOffset, header.sampleNamesInOrder); } @Override diff --git a/public/java/src/org/broadinstitute/sting/utils/gcf/GCF.java b/public/java/src/org/broadinstitute/sting/utils/gcf/GCF.java index e754c215d..b4ad81c02 100644 --- a/public/java/src/org/broadinstitute/sting/utils/gcf/GCF.java +++ b/public/java/src/org/broadinstitute/sting/utils/gcf/GCF.java @@ -191,8 +191,6 @@ public class GCF { boolean first = true; for ( Map.Entry field : vc.getAttributes().entrySet() ) { String key = field.getKey(); - if ( key.equals(VariantContext.UNPARSED_GENOTYPE_MAP_KEY) || key.equals(VariantContext.UNPARSED_GENOTYPE_PARSER_KEY) ) - continue; int stringIndex = GCFHeaderBuilder.encodeString(key); String outputValue = StandardVCFWriter.formatVCFField(field.getValue()); if ( outputValue != null ) { diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypesContext.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypesContext.java index 47e6b2fbe..846e6c89c 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypesContext.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypesContext.java @@ -55,8 +55,14 @@ public class GenotypesContext implements List { /** if true, then we need to reinitialize sampleNamesInOrder and sampleNameToOffset before we use them /*/ boolean cacheIsInvalid = true; - /** An ArrayList of genotypes contained in this context */ - List genotypes; + /** + * An ArrayList of genotypes contained in this context + * + * WARNING: TO ENABLE THE LAZY VERSION OF THIS CLASS, NO METHODS SHOULD DIRECTLY + * ACCESS THIS VARIABLE. USE getGenotypes() INSTEAD. + * + */ + ArrayList notToBeDirectlyAccessedGenotypes; /** Are we allowing users to modify the list? */ boolean immutable = false; @@ -70,7 +76,7 @@ public class GenotypesContext implements List { /** * Create an empty GenotypeContext */ - private GenotypesContext() { + protected GenotypesContext() { this(10, false); } @@ -78,7 +84,7 @@ public class GenotypesContext implements List { * Create an empty GenotypeContext, with initial capacity for n elements */ @Requires("n >= 0") - private GenotypesContext(final int n, final boolean immutable) { + protected GenotypesContext(final int n, final boolean immutable) { this(new ArrayList(n), immutable); } @@ -86,8 +92,8 @@ public class GenotypesContext implements List { * Create an GenotypeContext containing genotypes */ @Requires("genotypes != null") - private GenotypesContext(final ArrayList genotypes, final boolean immutable) { - this.genotypes = genotypes; + protected GenotypesContext(final ArrayList genotypes, final boolean immutable) { + this.notToBeDirectlyAccessedGenotypes = genotypes; this.immutable = immutable; this.sampleNameToOffset = null; this.cacheIsInvalid = true; @@ -110,11 +116,11 @@ public class GenotypesContext implements List { "sampleNamesInOrder != null", "genotypes.size() == sampleNameToOffset.size()", "genotypes.size() == sampleNamesInOrder.size()"}) - private GenotypesContext(final ArrayList genotypes, + protected GenotypesContext(final ArrayList genotypes, final Map sampleNameToOffset, final List sampleNamesInOrder, final boolean immutable) { - this.genotypes = genotypes; + this.notToBeDirectlyAccessedGenotypes = genotypes; this.immutable = immutable; this.sampleNameToOffset = sampleNameToOffset; this.sampleNamesInOrder = sampleNamesInOrder; @@ -203,7 +209,7 @@ public class GenotypesContext implements List { @Requires({"toCopy != null"}) @Ensures({"result != null"}) public static final GenotypesContext copy(final GenotypesContext toCopy) { - return create(new ArrayList(toCopy.genotypes)); + return create(new ArrayList(toCopy.getGenotypes())); } /** @@ -225,7 +231,6 @@ public class GenotypesContext implements List { // --------------------------------------------------------------------------- public final GenotypesContext immutable() { - this.genotypes = Collections.unmodifiableList(genotypes); immutable = true; return this; } @@ -255,16 +260,16 @@ public class GenotypesContext implements List { @Ensures({"cacheIsInvalid == false", "sampleNamesInOrder != null", "sampleNameToOffset != null", - "sameSamples(genotypes, sampleNamesInOrder)", - "sameSamples(genotypes, sampleNameToOffset.keySet())"}) - private synchronized void buildCache() { + "sameSamples(notToBeDirectlyAccessedGenotypes, sampleNamesInOrder)", + "sameSamples(notToBeDirectlyAccessedGenotypes, sampleNameToOffset.keySet())"}) + protected synchronized void buildCache() { if ( cacheIsInvalid ) { cacheIsInvalid = false; - sampleNamesInOrder = new ArrayList(genotypes.size()); - sampleNameToOffset = new HashMap(genotypes.size()); + sampleNamesInOrder = new ArrayList(size()); + sampleNameToOffset = new HashMap(size()); - for ( int i = 0; i < genotypes.size(); i++ ) { - final Genotype g = genotypes.get(i); + for ( int i = 0; i < size(); i++ ) { + final Genotype g = getGenotypes().get(i); sampleNamesInOrder.add(g.getSampleName()); sampleNameToOffset.put(g.getSampleName(), i); } @@ -279,20 +284,24 @@ public class GenotypesContext implements List { // // --------------------------------------------------------------------------- + protected ArrayList getGenotypes() { + return notToBeDirectlyAccessedGenotypes; + } + @Override public void clear() { checkImmutability(); - genotypes.clear(); + getGenotypes().clear(); } @Override public int size() { - return genotypes.size(); + return getGenotypes().size(); } @Override public boolean isEmpty() { - return genotypes.isEmpty(); + return getGenotypes().isEmpty(); } @Override @@ -300,14 +309,14 @@ public class GenotypesContext implements List { public boolean add(final Genotype genotype) { checkImmutability(); invalidateCaches(); - return genotypes.add(genotype); + return getGenotypes().add(genotype); } @Requires("genotype != null") public boolean add(final Genotype ... genotype) { checkImmutability(); invalidateCaches(); - return genotypes.addAll(Arrays.asList(genotype)); + return getGenotypes().addAll(Arrays.asList(genotype)); } @Override @@ -319,7 +328,7 @@ public class GenotypesContext implements List { public boolean addAll(final Collection genotypes) { checkImmutability(); invalidateCaches(); - return this.genotypes.addAll(genotypes); + return getGenotypes().addAll(genotypes); } @Override @@ -329,38 +338,43 @@ public class GenotypesContext implements List { @Override public boolean contains(final Object o) { - return this.genotypes.contains(o); + return getGenotypes().contains(o); } @Override public boolean containsAll(final Collection objects) { - return this.genotypes.containsAll(objects); + return getGenotypes().containsAll(objects); } @Override public Genotype get(final int i) { - return genotypes.get(i); + return getGenotypes().get(i); } public Genotype get(final String sampleName) { buildCache(); - Integer offset = sampleNameToOffset.get(sampleName); - return offset == null ? null : genotypes.get(offset); + Integer offset = getSampleI(sampleName); + return offset == null ? null : getGenotypes().get(offset); + } + + private Integer getSampleI(final String sampleName) { + buildCache(); + return sampleNameToOffset.get(sampleName); } @Override public int indexOf(final Object o) { - return genotypes.indexOf(o); + return getGenotypes().indexOf(o); } @Override public Iterator iterator() { - return genotypes.iterator(); + return getGenotypes().iterator(); } @Override public int lastIndexOf(final Object o) { - return genotypes.lastIndexOf(o); + return getGenotypes().lastIndexOf(o); } @Override @@ -381,50 +395,67 @@ public class GenotypesContext implements List { public Genotype remove(final int i) { checkImmutability(); invalidateCaches(); - return genotypes.remove(i); + return getGenotypes().remove(i); } @Override public boolean remove(final Object o) { checkImmutability(); invalidateCaches(); - return genotypes.remove(o); + return getGenotypes().remove(o); } @Override public boolean removeAll(final Collection objects) { checkImmutability(); invalidateCaches(); - return genotypes.removeAll(objects); + return getGenotypes().removeAll(objects); } @Override public boolean retainAll(final Collection objects) { checkImmutability(); invalidateCaches(); - return genotypes.retainAll(objects); + return getGenotypes().retainAll(objects); } @Override public Genotype set(final int i, final Genotype genotype) { checkImmutability(); invalidateCaches(); - return genotypes.set(i, genotype); + return getGenotypes().set(i, genotype); + } + + /** + * Replaces the genotype in this context -- note for efficiency + * reasons we do not add the genotype if it's not present. The + * return value will be null indicating this happened. + * @param genotype a non null genotype to bind in this context + * @return null if genotype was not added, otherwise returns the previous genotype + */ + @Requires("genotype != null") + public Genotype replace(final Genotype genotype) { + checkImmutability(); + Integer offset = getSampleI(genotype.getSampleName()); + if ( offset == null ) + return null; + else + return getGenotypes().set(offset, genotype); } @Override public List subList(final int i, final int i1) { - return genotypes.subList(i, i1); + return getGenotypes().subList(i, i1); } @Override public Object[] toArray() { - return genotypes.toArray(); + return getGenotypes().toArray(); } @Override public T[] toArray(final T[] ts) { - return genotypes.toArray(ts); + return getGenotypes().toArray(ts); } /** @@ -528,13 +559,13 @@ public class GenotypesContext implements List { @Requires("samples != null") @Ensures("result != null") public GenotypesContext subsetToSamples( final Set samples ) { - if ( samples.size() == genotypes.size() ) + if ( samples.size() == size() ) return this; else if ( samples.isEmpty() ) return NO_GENOTYPES; else { GenotypesContext subset = create(samples.size()); - for ( final Genotype g : genotypes ) { + for ( final Genotype g : getGenotypes() ) { if ( samples.contains(g.getSampleName()) ) { subset.add(g); } diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/LazyGenotypesContext.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/LazyGenotypesContext.java new file mode 100644 index 000000000..ca2d7a812 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/LazyGenotypesContext.java @@ -0,0 +1,128 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.variantcontext; + +import org.broadinstitute.sting.utils.codecs.vcf.VCFParser; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Map; + +/** + * [Short one sentence description of this walker] + *

+ *

+ * [Functionality of this walker] + *

+ *

+ *

Input

+ *

+ * [Input description] + *

+ *

+ *

Output

+ *

+ * [Output description] + *

+ *

+ *

Examples

+ *
+ *    java
+ *      -jar GenomeAnalysisTK.jar
+ *      -T $WalkerName
+ *  
+ * + * @author Your Name + * @since Date created + */ +public class LazyGenotypesContext extends GenotypesContext { + final VCFParser parser; + String unparsedGenotypeData; + final List alleles; + final String contig; + final int start; + final int nUnparsedGenotypes; + + boolean loaded = false; + + private final static ArrayList EMPTY = new ArrayList(0); + + public LazyGenotypesContext(final VCFParser parser, final String unparsedGenotypeData, + final String contig, final int start, final List alleles, + int nUnparsedGenotypes ) { + super(EMPTY, false); + this.unparsedGenotypeData = unparsedGenotypeData; + this.start = start; + this.parser = parser; + this.contig = contig; + this.alleles = alleles; + this.nUnparsedGenotypes = nUnparsedGenotypes; + } + + @Override + protected ArrayList getGenotypes() { + if ( ! loaded ) { + //System.out.printf("Loading genotypes... %s:%d%n", contig, start); + GenotypesContext subcontext = parser.createGenotypeMap(unparsedGenotypeData, alleles, contig, start); + notToBeDirectlyAccessedGenotypes = subcontext.notToBeDirectlyAccessedGenotypes; + sampleNamesInOrder = subcontext.sampleNamesInOrder; + sampleNameToOffset = subcontext.sampleNameToOffset; + cacheIsInvalid = false; + loaded = true; + unparsedGenotypeData = null; + + // warning -- this path allows us to create a VariantContext that doesn't run validateGenotypes() + // That said, it's not such an important routine -- it's just checking that the genotypes + // are well formed w.r.t. the alleles list, but this will be enforced within the VCFCodec + } + + return notToBeDirectlyAccessedGenotypes; + } + + protected synchronized void buildCache() { + if ( cacheIsInvalid ) { + getGenotypes(); // will load up all of the necessary data + } + } + + @Override + public boolean isEmpty() { + // optimization -- we know the number of samples in the unparsed data, so use it here to + // avoid parsing just to know if the genotypes context is empty + return loaded ? super.isEmpty() : nUnparsedGenotypes == 0; + } + + @Override + public int size() { + // optimization -- we know the number of samples in the unparsed data, so use it here to + // avoid parsing just to know the size of the context + return loaded ? super.size() : nUnparsedGenotypes; + } + + public String getUnparsedGenotypeData() { + return unparsedGenotypeData; + } +} diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java index 34131b9c4..8d74f5220 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java @@ -165,8 +165,6 @@ import java.util.*; public class VariantContext implements Feature { // to enable tribble intergration protected CommonInfo commonInfo = null; public final static double NO_LOG10_PERROR = CommonInfo.NO_LOG10_PERROR; - public final static String UNPARSED_GENOTYPE_MAP_KEY = "_UNPARSED_GENOTYPE_MAP_"; - public final static String UNPARSED_GENOTYPE_PARSER_KEY = "_UNPARSED_GENOTYPE_PARSER_"; @Deprecated // ID is no longer stored in the attributes map private final static String ID_KEY = "ID"; @@ -231,7 +229,11 @@ public class VariantContext implements Feature { // to enable tribble intergrati * @param other the VariantContext to copy */ protected VariantContext(VariantContext other) { - this(other.getSource(), other.getID(), other.getChr(), other.getStart(), other.getEnd() , other.getAlleles(), other.getGenotypes(), other.getLog10PError(), other.filtersWereApplied() ? other.getFilters() : null, other.getAttributes(), other.REFERENCE_BASE_FOR_INDEL, false, NO_VALIDATION); + this(other.getSource(), other.getID(), other.getChr(), other.getStart(), other.getEnd(), + other.getAlleles(), other.getGenotypes(), other.getLog10PError(), + other.getFiltersMaybeNull(), + other.getAttributes(), other.REFERENCE_BASE_FOR_INDEL, + NO_VALIDATION); } /** @@ -247,14 +249,13 @@ public class VariantContext implements Feature { // to enable tribble intergrati * @param filters filters: use null for unfiltered and empty set for passes filters * @param attributes attributes * @param referenceBaseForIndel padded reference base - * @param genotypesAreUnparsed true if the genotypes have not yet been parsed * @param validationToPerform set of validation steps to take */ protected VariantContext(String source, String ID, String contig, long start, long stop, Collection alleles, GenotypesContext genotypes, double log10PError, Set filters, Map attributes, - Byte referenceBaseForIndel, boolean genotypesAreUnparsed, + Byte referenceBaseForIndel, EnumSet validationToPerform ) { if ( contig == null ) { throw new IllegalArgumentException("Contig cannot be null"); } this.contig = contig; @@ -265,17 +266,6 @@ public class VariantContext implements Feature { // to enable tribble intergrati if ( ID == null || ID.equals("") ) throw new IllegalArgumentException("ID field cannot be the null or the empty string"); this.ID = ID.equals(VCFConstants.EMPTY_ID_FIELD) ? VCFConstants.EMPTY_ID_FIELD : ID; - if ( !genotypesAreUnparsed && attributes != null ) { - if ( attributes.containsKey(UNPARSED_GENOTYPE_MAP_KEY) ) { - attributes = new HashMap(attributes); - attributes.remove(UNPARSED_GENOTYPE_MAP_KEY); - } - if ( attributes.containsKey(UNPARSED_GENOTYPE_PARSER_KEY) ) { - attributes = new HashMap(attributes); - attributes.remove(UNPARSED_GENOTYPE_PARSER_KEY); - } - } - this.commonInfo = new CommonInfo(source, log10PError, filters, attributes); REFERENCE_BASE_FOR_INDEL = referenceBaseForIndel; @@ -316,13 +306,11 @@ public class VariantContext implements Feature { // to enable tribble intergrati // --------------------------------------------------------------------------------------------------------- public VariantContext subContextFromSamples(Set sampleNames, Collection alleles) { - loadGenotypes(); VariantContextBuilder builder = new VariantContextBuilder(this); return builder.genotypes(genotypes.subsetToSamples(sampleNames)).alleles(alleles).make(); } public VariantContext subContextFromSamples(Set sampleNames) { - loadGenotypes(); VariantContextBuilder builder = new VariantContextBuilder(this); GenotypesContext newGenotypes = genotypes.subsetToSamples(sampleNames); return builder.genotypes(newGenotypes).alleles(allelesOfGenotypes(newGenotypes)).make(); @@ -698,35 +686,10 @@ public class VariantContext implements Feature { // to enable tribble intergrati // // --------------------------------------------------------------------------------------------------------- - private void loadGenotypes() { - if ( !hasAttribute(UNPARSED_GENOTYPE_MAP_KEY) ) { - if ( genotypes == null ) - genotypes = NO_GENOTYPES; - return; - } - - Object parserObj = getAttribute(UNPARSED_GENOTYPE_PARSER_KEY); - if ( parserObj == null || !(parserObj instanceof VCFParser) ) - throw new IllegalStateException("There is no VCF parser stored to unparse the genotype data"); - VCFParser parser = (VCFParser)parserObj; - - Object mapObj = getAttribute(UNPARSED_GENOTYPE_MAP_KEY); - if ( mapObj == null ) - throw new IllegalStateException("There is no mapping string stored to unparse the genotype data"); - - genotypes = parser.createGenotypeMap(mapObj.toString(), new ArrayList(alleles), getChr(), getStart()); - - commonInfo.removeAttribute(UNPARSED_GENOTYPE_MAP_KEY); - commonInfo.removeAttribute(UNPARSED_GENOTYPE_PARSER_KEY); - - validateGenotypes(); - } - /** * @return the number of samples in the context */ public int getNSamples() { - loadGenotypes(); return genotypes.size(); } @@ -734,12 +697,10 @@ public class VariantContext implements Feature { // to enable tribble intergrati * @return true if the context has associated genotypes */ public boolean hasGenotypes() { - loadGenotypes(); return ! genotypes.isEmpty(); } public boolean hasGenotypes(Collection sampleNames) { - loadGenotypes(); return genotypes.containsSamples(sampleNames); } @@ -747,17 +708,14 @@ public class VariantContext implements Feature { // to enable tribble intergrati * @return set of all Genotypes associated with this context */ public GenotypesContext getGenotypes() { - loadGenotypes(); return genotypes; } public Iterable getGenotypesOrderedByName() { - loadGenotypes(); return genotypes.iterateInSampleNameOrder(); } public Iterable getGenotypesOrderedBy(Iterable sampleOrdering) { - loadGenotypes(); return genotypes.iterateInSampleNameOrder(sampleOrdering); } diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextBuilder.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextBuilder.java index 379a01bb4..b79584df8 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextBuilder.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextBuilder.java @@ -72,7 +72,6 @@ public class VariantContextBuilder { private Map attributes = null; private boolean attributesCanBeModified = false; private Byte referenceBaseForIndel = null; - private boolean genotypesAreUnparsed = false; /** enum of what must be validated */ final private EnumSet toValidate = EnumSet.noneOf(VariantContext.Validation.class); @@ -112,7 +111,6 @@ public class VariantContextBuilder { this.contig = parent.contig; this.filters = parent.getFiltersMaybeNull(); this.genotypes = parent.genotypes; - this.genotypesAreUnparsed = parent.hasAttribute(VariantContext.UNPARSED_GENOTYPE_MAP_KEY); this.ID = parent.getID(); this.log10PError = parent.getLog10PError(); this.referenceBaseForIndel = parent.getReferenceBaseForIndel(); @@ -179,7 +177,7 @@ public class VariantContextBuilder { /** * Makes the attributes field modifiable. In many cases attributes is just a pointer to an immutable - * collection, so methods that want to add / remove records require the attributes to be copied first + * collection, so methods that want to add / remove records require the attributes to be copied to a */ private void makeAttributesModifiable() { if ( ! attributesCanBeModified ) { @@ -243,6 +241,11 @@ public class VariantContextBuilder { return this; } + public VariantContextBuilder genotypesNoValidation(final GenotypesContext genotypes) { + this.genotypes = genotypes; + return this; + } + /** * Tells this builder that the resulting VariantContext should use a GenotypeContext containing genotypes * @@ -270,15 +273,6 @@ public class VariantContextBuilder { return this; } - /** - * ADVANCED! tells us that the genotypes data is stored as an unparsed attribute - * @return - */ - public VariantContextBuilder genotypesAreUnparsed() { - this.genotypesAreUnparsed = true; - return this; - } - /** * Tells us that the resulting VariantContext should have ID * @param ID @@ -395,6 +389,6 @@ public class VariantContextBuilder { public VariantContext make() { return new VariantContext(source, ID, contig, start, stop, alleles, genotypes, log10PError, filters, attributes, - referenceBaseForIndel, genotypesAreUnparsed, toValidate); + referenceBaseForIndel, toValidate); } } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java index 1cb43ceb1..c2348b4a3 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java @@ -14,9 +14,12 @@ public class VariantFiltrationIntegrationTest extends WalkerTest { @Test public void testNoAction() { + // note that this input if slightly malformed, but with the new properly + // only when really needed genotype loading of VCF files we don't actually + // fix the file in the output WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " --variant:VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, - Arrays.asList("8a105fa5eebdfffe7326bc5b3d8ffd1c")); + Arrays.asList("b7b7c218e219cd923ce5b6eefc5b7171")); executeTest("test no action", spec); } @@ -24,59 +27,86 @@ public class VariantFiltrationIntegrationTest extends WalkerTest { public void testClusteredSnps() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -window 10 --variant:VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, - Arrays.asList("27b13f179bb4920615dff3a32730d845")); + Arrays.asList("6d45a19e4066e7de6ff6a61f43ffad2b")); executeTest("test clustered SNPs", spec); } @Test - public void testMasks() { + public void testMask1() { + // note that this input if slightly malformed, but with the new properly + // only when really needed genotype loading of VCF files we don't actually + // fix the file in the output WalkerTestSpec spec1 = new WalkerTestSpec( baseTestString() + " -maskName foo --mask:VCF3 " + validationDataLocation + "vcfexample2.vcf --variant:VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, - Arrays.asList("578f9e774784c25871678e6464fd212b")); + Arrays.asList("65b5006bf3ee9d9d08a36d6b854773f2")); executeTest("test mask all", spec1); + } + @Test + public void testMask2() { + // note that this input if slightly malformed, but with the new properly + // only when really needed genotype loading of VCF files we don't actually + // fix the file in the output WalkerTestSpec spec2 = new WalkerTestSpec( baseTestString() + " -maskName foo --mask:VCF " + validationDataLocation + "vcfMask.vcf --variant:VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, - Arrays.asList("bfa86a674aefca1b13d341cb14ab3c4f")); + Arrays.asList("a275d36baca81a1ce03dbb528e95a069")); executeTest("test mask some", spec2); + } + @Test + public void testMask3() { + // note that this input if slightly malformed, but with the new properly + // only when really needed genotype loading of VCF files we don't actually + // fix the file in the output WalkerTestSpec spec3 = new WalkerTestSpec( baseTestString() + " -maskName foo -maskExtend 10 --mask:VCF " + validationDataLocation + "vcfMask.vcf --variant:VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, - Arrays.asList("5939f80d14b32d88587373532d7b90e5")); + Arrays.asList("c9489e1c1342817c36ab4f0770609bdb")); executeTest("test mask extend", spec3); } @Test public void testFilter1() { WalkerTestSpec spec = new WalkerTestSpec( + // note that this input if slightly malformed, but with the new properly + // only when really needed genotype loading of VCF files we don't actually + // fix the file in the output baseTestString() + " -filter 'DoC < 20 || FisherStrand > 20.0' -filterName foo --variant:VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, - Arrays.asList("45219dbcfb6f81bba2ea0c35f5bfd368")); + Arrays.asList("327a611bf82c6c4ae77fbb6d06359f9d")); executeTest("test filter #1", spec); } @Test public void testFilter2() { + // note that this input if slightly malformed, but with the new properly + // only when really needed genotype loading of VCF files we don't actually + // fix the file in the output WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -filter 'AlleleBalance < 70.0 && FisherStrand == 1.4' -filterName bar --variant:VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, - Arrays.asList("c95845e817da7352b9b72bc9794f18fb")); + Arrays.asList("7612b3460575402ad78fa4173178bdcc")); executeTest("test filter #2", spec); } @Test public void testFilterWithSeparateNames() { + // note that this input if slightly malformed, but with the new properly + // only when really needed genotype loading of VCF files we don't actually + // fix the file in the output WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " --filterName ABF -filter 'AlleleBalance < 0.7' --filterName FSF -filter 'FisherStrand == 1.4' --variant:VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, - Arrays.asList("b8cdd7f44ff1a395e0a9b06a87e1e530")); + Arrays.asList("dce33441f58b284ac9ab94f8e64b84e3")); executeTest("test filter with separate names #2", spec); } @Test - public void testGenotypeFilters() { + public void testGenotypeFilters1() { WalkerTestSpec spec1 = new WalkerTestSpec( baseTestString() + " -G_filter 'GQ == 0.60' -G_filterName foo --variant:VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, Arrays.asList("96b61e4543a73fe725e433f007260039")); executeTest("test genotype filter #1", spec1); + } + @Test + public void testGenotypeFilters2() { WalkerTestSpec spec2 = new WalkerTestSpec( baseTestString() + " -G_filter 'AF == 0.04 && isHomVar == 1' -G_filterName foo --variant:VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, Arrays.asList("6c8112ab17ce39c8022c891ae73bf38e"));