diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java index 081f86ab9..7953edd7f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java @@ -255,7 +255,7 @@ public class VariantContextAdaptors { genotypes.add(call); alleles.add(refAllele); GenomeLoc loc = ref.getGenomeLocParser().createGenomeLoc(geli.getChr(),geli.getStart()); - return new VariantContext(name, VCFConstants.EMPTY_ID_FIELD, loc.getContig(), loc.getStart(), loc.getStop(), alleles, genotypes, geli.getLODBestToReference(), null, attributes); + return new VariantContextBuilder(name, loc.getContig(), loc.getStart(), loc.getStop(), alleles).genotypes(genotypes).negLog10PError(geli.getLODBestToReference()).attributes(attributes).make(); } else return null; // can't handle anything else } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java index 9f0353eb9..b782de15f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java @@ -36,6 +36,7 @@ import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.variantcontext.Genotype; import org.broadinstitute.sting.utils.variantcontext.GenotypesContext; import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder; import java.util.*; @@ -179,10 +180,10 @@ public class VariantAnnotatorEngine { } // generate a new annotated VC - final VariantContext annotatedVC = VariantContext.modifyAttributes(vc, infoAnnotations); + VariantContextBuilder builder = new VariantContextBuilder(vc).attributes(infoAnnotations); // annotate genotypes, creating another new VC in the process - return VariantContext.modifyGenotypes(annotatedVC, annotateGenotypes(tracker, ref, stratifiedContexts, vc)); + return builder.genotypes(annotateGenotypes(tracker, ref, stratifiedContexts, vc)).make(); } private VariantContext annotateDBs(RefMetaDataTracker tracker, ReferenceContext ref, VariantContext vc, Map infoAnnotations) { @@ -192,7 +193,7 @@ public class VariantAnnotatorEngine { infoAnnotations.put(VCFConstants.DBSNP_KEY, rsID != null); // annotate dbsnp id if available and not already there if ( rsID != null && vc.emptyID() ) - vc = VariantContext.modifyID(vc, rsID); + vc = new VariantContextBuilder(vc).id(rsID).make(); } else { boolean overlapsComp = false; for ( VariantContext comp : tracker.getValues(dbSet.getKey(), ref.getLocus()) ) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java index 649b7621b..d4aa21097 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java @@ -358,7 +358,7 @@ public class BeagleOutputToVCFWalker extends RodWalker { } - vcfWriter.add(VariantContext.modifyAttributes(filteredVC,attributes)); + vcfWriter.add(new VariantContextBuilder(filteredVC).attributes(attributes).make()); return 1; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java index 1d6eb4b64..aa71f4399 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java @@ -201,7 +201,7 @@ public class ProduceBeagleInputWalker extends RodWalker { logger.debug(String.format("boot: %d, test: %d, total: %d", bootstrapSetSize, testSetSize, bootstrapSetSize+testSetSize+1)); if ( (bootstrapSetSize+1.0)/(1.0+bootstrapSetSize+testSetSize) <= bootstrap ) { if ( bootstrapVCFOutput != null ) { - bootstrapVCFOutput.add(VariantContext.modifyFilters(validation, BOOTSTRAP_FILTER)); + bootstrapVCFOutput.add(new VariantContextBuilder(validation).filters(BOOTSTRAP_FILTER).make()); } bootstrapSetSize++; return true; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java index 409e180ae..049b92084 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java @@ -36,10 +36,7 @@ import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.GenotypesContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; +import org.broadinstitute.sting.utils.variantcontext.*; import java.util.*; @@ -225,7 +222,7 @@ public class VariantFiltrationWalker extends RodWalker { (vc.getFilters() == null || !vc.getFilters().contains(MASK_NAME)) ) { // the filter hasn't already been applied Set filters = new LinkedHashSet(vc.getFilters()); filters.add(MASK_NAME); - vc = VariantContext.modifyFilters(vc, filters); + vc = new VariantContextBuilder(vc).filters(filters).make(); } FiltrationContext varContext = new FiltrationContext(ref, vc); @@ -268,7 +265,7 @@ public class VariantFiltrationWalker extends RodWalker { (vc.getFilters() == null || !vc.getFilters().contains(MASK_NAME)) ) { // the filter hasn't already been applied Set filters = new LinkedHashSet(vc.getFilters()); filters.add(MASK_NAME); - vc = VariantContext.modifyFilters(vc, filters); + vc = new VariantContextBuilder(vc).filters(filters).make(); } return vc; @@ -325,7 +322,7 @@ public class VariantFiltrationWalker extends RodWalker { VariantContext filteredVC; if ( genotypes == null ) - filteredVC = VariantContext.modifyFilters(vc, filters); + filteredVC = new VariantContextBuilder(vc).filters(filters).make(); else filteredVC = new VariantContext(vc.getSource(), vc.getID(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), genotypes, vc.getNegLog10PError(), filters, vc.getAttributes()); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCallVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCallVariants.java index 6dc31edb8..60513ca5f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCallVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCallVariants.java @@ -35,10 +35,7 @@ import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.GenotypesContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; +import org.broadinstitute.sting.utils.variantcontext.*; import java.util.*; @@ -111,7 +108,7 @@ public class UGCallVariants extends RodWalker { try { Map attrs = new HashMap(value.getAttributes()); VariantContextUtils.calculateChromosomeCounts(value, attrs, true); - writer.add(VariantContext.modifyAttributes(value, attrs)); + writer.add(new VariantContextBuilder(value).attributes(attrs).make()); } catch (IllegalArgumentException e) { throw new IllegalArgumentException(e.getMessage() + "; this is often caused by using the --assume_single_sample_reads argument with the wrong sample name"); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index 5692c2525..0fe7e1fc2 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -229,8 +229,7 @@ public class UnifiedGenotyperEngine { VariantContext vcInput = UnifiedGenotyperEngine.getVCFromAllelesRod(tracker, ref, rawContext.getLocation(), false, logger, UAC.alleles); if ( vcInput == null ) return null; - vc = new VariantContext("UG_call", VCFConstants.EMPTY_ID_FIELD, vcInput.getChr(), vcInput.getStart(), vcInput.getEnd(), vcInput.getAlleles(), VariantContext.NO_NEG_LOG_10PERROR, null, null, ref.getBase()); - + vc = new VariantContextBuilder(vcInput).source("UG_call").noID().referenceBaseForIndel(ref.getBase()).make(); } else { // deal with bad/non-standard reference bases if ( !Allele.acceptableAlleleBases(new byte[]{ref.getBase()}) ) @@ -238,7 +237,7 @@ public class UnifiedGenotyperEngine { Set alleles = new HashSet(); alleles.add(Allele.create(ref.getBase(), true)); - vc = new VariantContext("UG_call", VCFConstants.EMPTY_ID_FIELD, ref.getLocus().getContig(), ref.getLocus().getStart(), ref.getLocus().getStart(), alleles); + vc = new VariantContextBuilder("UG_call", ref.getLocus().getContig(), ref.getLocus().getStart(), ref.getLocus().getStart(), alleles).make(); } if ( annotationEngine != null ) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesVCFWriter.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesVCFWriter.java index b935600b2..2f15c165f 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesVCFWriter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesVCFWriter.java @@ -33,10 +33,7 @@ import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; +import org.broadinstitute.sting.utils.variantcontext.*; import java.io.File; import java.io.FileNotFoundException; @@ -186,7 +183,7 @@ class MergeSegregatingAlternateAllelesVCFWriter implements VCFWriter { Map addedAttribs = vcMergeRule.addToMergedAttributes(vcfrWaitingToMerge.vc, vc); addedAttribs.putAll(mergedVc.getAttributes()); - mergedVc = VariantContext.modifyAttributes(mergedVc, addedAttribs); + mergedVc = new VariantContextBuilder(mergedVc).attributes(addedAttribs).make(); vcfrWaitingToMerge = new VCFRecord(mergedVc, true); numMergedRecords++; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java index 0b28459d4..088ff4c71 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java @@ -309,7 +309,7 @@ public class PhaseByTransmission extends RodWalker { genotypesContext.add(phasedMother, phasedFather, phasedChild); } - VariantContext newvc = VariantContext.modifyGenotypes(vc, genotypesContext); + VariantContext newvc = new VariantContextBuilder(vc).genotypes(genotypesContext).make(); vcfWriter.add(newvc); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhasingUtils.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhasingUtils.java index fddef5129..cac171948 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhasingUtils.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhasingUtils.java @@ -135,7 +135,7 @@ class PhasingUtils { mergedAttribs = new HashMap(mergedVc.getAttributes()); VariantContextUtils.calculateChromosomeCounts(mergedVc, mergedAttribs, true); - mergedVc = VariantContext.modifyAttributes(mergedVc, mergedAttribs); + mergedVc = new VariantContextBuilder(mergedVc).attributes(mergedAttribs).make(); return mergedVc; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/GenotypeAndValidateWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/GenotypeAndValidateWalker.java index 8f9f3f1af..f370e2818 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/GenotypeAndValidateWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/GenotypeAndValidateWalker.java @@ -40,6 +40,7 @@ import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter; import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder; import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import java.util.Map; @@ -465,7 +466,7 @@ public class GenotypeAndValidateWalker extends RodWalker implements Tr for ( VariantContext eval : evalSetBySample ) { // deal with ancestral alleles if requested if ( eval != null && aastr != null ) { - HashMap newAts = new HashMap(eval.getAttributes()); - newAts.put("ANCESTRALALLELE", aastr); - eval = VariantContext.modifyAttributes(eval, newAts); + eval = new VariantContextBuilder(eval).attribute("ANCESTRALALLELE", aastr).make(); } // for each comp track diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java index 24caed549..6da693c7a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java @@ -16,6 +16,7 @@ import org.broadinstitute.sting.utils.classloader.PluginManager; import org.broadinstitute.sting.utils.exceptions.StingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder; import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import java.lang.reflect.Field; @@ -289,7 +290,7 @@ public class VariantEvalUtils { } VariantContextUtils.calculateChromosomeCounts(vcsub, newAts, true); - vcsub = VariantContext.modifyAttributes(vcsub, newAts); + vcsub = new VariantContextBuilder(vcsub).attributes(newAts).make(); //VariantEvalWalker.logger.debug(String.format("VC %s subset to %s AC%n", vc.getSource(), vc.getAttributeAsString(VCFConstants.ALLELE_COUNT_KEY))); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java index 1d5493daf..b1b8fa46d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java @@ -41,6 +41,7 @@ import org.broadinstitute.sting.utils.collections.NestedHashMap; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.text.XReadLines; import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder; import java.io.File; import java.io.FileNotFoundException; @@ -203,8 +204,9 @@ public class ApplyRecalibration extends RodWalker { for( VariantContext vc : tracker.getValues(input, context.getLocation()) ) { if( vc != null ) { if( VariantRecalibrator.checkRecalibrationMode( vc, MODE ) && (vc.isNotFiltered() || ignoreInputFilterSet.containsAll(vc.getFilters())) ) { + VariantContextBuilder builder = new VariantContextBuilder(vc); String filterString = null; - final Map attrs = new HashMap(vc.getAttributes()); + final Double lod = (Double) lodMap.get( vc.getChr(), vc.getStart(), vc.getEnd() ); final String worstAnnotation = (String) annotationMap.get( vc.getChr(), vc.getStart(), vc.getEnd() ); if( lod == null ) { @@ -212,8 +214,8 @@ public class ApplyRecalibration extends RodWalker { } // Annotate the new record with its VQSLOD and the worst performing annotation - attrs.put(VariantRecalibrator.VQS_LOD_KEY, String.format("%.4f", lod)); - attrs.put(VariantRecalibrator.CULPRIT_KEY, worstAnnotation); + builder.attribute(VariantRecalibrator.VQS_LOD_KEY, String.format("%.4f", lod)); + builder.attribute(VariantRecalibrator.CULPRIT_KEY, worstAnnotation); for( int i = tranches.size() - 1; i >= 0; i-- ) { final Tranche tranche = tranches.get(i); @@ -232,11 +234,10 @@ public class ApplyRecalibration extends RodWalker { } if( !filterString.equals(VCFConstants.PASSES_FILTERS_v4) ) { - final Set filters = new HashSet(); - filters.add(filterString); - vc = VariantContext.modifyFilters(vc, filters); + builder.filters(filterString); } - vcfWriter.add( VariantContext.modifyPErrorFiltersAndAttributes(vc, vc.getNegLog10PError(), vc.getFilters(), attrs) ); + + vcfWriter.add( builder.make() ); } else { // valid VC but not compatible with this mode, so just emit the variant untouched vcfWriter.add( vc ); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java index 573e15971..d74f5a269 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java @@ -38,6 +38,7 @@ import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder; import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import java.util.*; @@ -252,7 +253,7 @@ public class CombineVariants extends RodWalker { HashMap attributes = new HashMap(mergedVC.getAttributes()); // re-compute chromosome counts VariantContextUtils.calculateChromosomeCounts(mergedVC, attributes, false); - VariantContext annotatedMergedVC = VariantContext.modifyAttributes(mergedVC, attributes); + VariantContext annotatedMergedVC = new VariantContextBuilder(mergedVC).attributes(attributes).make(); if ( minimalVCF ) annotatedMergedVC = VariantContextUtils.pruneVariantContext(annotatedMergedVC, Arrays.asList(SET_KEY)); vcfWriter.add(annotatedMergedVC); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java index 4b3271ba6..f357f8a40 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java @@ -38,10 +38,7 @@ import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.sam.AlignmentUtils; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.GenotypesContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.*; import java.util.*; @@ -161,7 +158,7 @@ public class LeftAlignVariants extends RodWalker { // update if necessary and write if ( !newCigar.equals(originalCigar) && newCigar.numCigarElements() > 1 ) { int difference = originalIndex - newCigar.getCigarElement(0).getLength(); - VariantContext newVC = VariantContext.modifyLocation(vc, vc.getChr(), vc.getStart()-difference, vc.getEnd()-difference); + VariantContext newVC = new VariantContextBuilder(vc).start(vc.getStart()-difference).stop(vc.getEnd()-difference).make(); //System.out.println("Moving record from " + vc.getChr()+":"+vc.getStart() + " to " + vc.getChr()+":"+(vc.getStart()-difference)); int indelIndex = originalIndex-difference; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java index a932d44ed..50fafa202 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java @@ -39,6 +39,7 @@ import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder; import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import java.io.File; @@ -117,16 +118,15 @@ public class LiftoverVariants extends RodWalker { vc = VariantContextUtils.reverseComplement(vc); } - vc = VariantContext.modifyLocation(vc, toInterval.getSequence(), toInterval.getStart(), toInterval.getStart() + length); + vc = new VariantContextBuilder(vc).loc(toInterval.getSequence(), toInterval.getStart(), toInterval.getStart() + length).make(); if ( RECORD_ORIGINAL_LOCATION ) { - HashMap attrs = new HashMap(vc.getAttributes()); - attrs.put("OriginalChr", fromInterval.getSequence()); - attrs.put("OriginalStart", fromInterval.getStart()); - vc = VariantContext.modifyAttributes(vc, attrs); + vc = new VariantContextBuilder(vc) + .attribute("OriginalChr", fromInterval.getSequence()) + .attribute("OriginalStart", fromInterval.getStart()).make(); } - VariantContext newVC = VariantContext.createVariantContextWithPaddedAlleles(vc, false); + VariantContext newVC = VariantContextUtils.createVariantContextWithPaddedAlleles(vc, false); if ( originalVC.isSNP() && originalVC.isBiallelic() && VariantContextUtils.getSNPSubstitutionType(originalVC) != VariantContextUtils.getSNPSubstitutionType(newVC) ) { logger.warn(String.format("VCF at %s / %d => %s / %d is switching substitution type %s/%s to %s/%s", originalVC.getChr(), originalVC.getStart(), newVC.getChr(), newVC.getStart(), diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java index 7f6e605e7..be9a193d3 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java @@ -654,16 +654,12 @@ public class SelectVariants extends RodWalker { if ( samples == null || samples.isEmpty() ) return vc; -// logger.info("Genotypes in full vc: " + vc.getGenotypes()); -// logger.info("My own sub : " + vc.getGenotypes().subsetToSamples(samples)); - VariantContext sub = vc.subContextFromSamples(samples, vc.getAlleles()); -// logger.info("Genotypes in sub vc: " + sub.getGenotypes()); + final VariantContext sub = vc.subContextFromSamples(samples, vc.getAlleles()); + VariantContextBuilder builder = new VariantContextBuilder(sub); // if we have fewer alternate alleles in the selected VC than in the original VC, we need to strip out the GL/PLs (because they are no longer accurate) if ( vc.getAlleles().size() != sub.getAlleles().size() ) - sub = VariantContext.modifyGenotypes(sub, VariantContextUtils.stripPLs(vc.getGenotypes())); - - HashMap attributes = new HashMap(sub.getAttributes()); + builder.genotypes(VariantContextUtils.stripPLs(vc.getGenotypes())); int depth = 0; for (String sample : sub.getSampleNames()) { @@ -680,22 +676,19 @@ public class SelectVariants extends RodWalker { if (KEEP_ORIGINAL_CHR_COUNTS) { - if ( attributes.containsKey(VCFConstants.ALLELE_COUNT_KEY) ) - attributes.put("AC_Orig",attributes.get(VCFConstants.ALLELE_COUNT_KEY)); - if ( attributes.containsKey(VCFConstants.ALLELE_FREQUENCY_KEY) ) - attributes.put("AF_Orig",attributes.get(VCFConstants.ALLELE_FREQUENCY_KEY)); - if ( attributes.containsKey(VCFConstants.ALLELE_NUMBER_KEY) ) - attributes.put("AN_Orig",attributes.get(VCFConstants.ALLELE_NUMBER_KEY)); - + if ( sub.hasAttribute(VCFConstants.ALLELE_COUNT_KEY) ) + builder.attribute("AC_Orig",sub.getAttribute(VCFConstants.ALLELE_COUNT_KEY)); + if ( sub.hasAttribute(VCFConstants.ALLELE_FREQUENCY_KEY) ) + builder.attribute("AF_Orig",sub.getAttribute(VCFConstants.ALLELE_FREQUENCY_KEY)); + if ( sub.hasAttribute(VCFConstants.ALLELE_NUMBER_KEY) ) + builder.attribute("AN_Orig",sub.getAttribute(VCFConstants.ALLELE_NUMBER_KEY)); } - VariantContextUtils.calculateChromosomeCounts(sub,attributes,false); + Map attributes = new HashMap(builder.make().getAttributes()); + VariantContextUtils.calculateChromosomeCounts(sub, attributes, false); attributes.put("DP", depth); - sub = VariantContext.modifyAttributes(sub, attributes); - -// logger.info("Genotypes in final vc: " + sub.getGenotypes()); - return sub; + return new VariantContextBuilder(builder.make()).attributes(attributes).make(); } private void randomlyAddVariant(int rank, VariantContext vc, byte refBase) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java index 4e6cc722d..79bbea29d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java @@ -36,6 +36,7 @@ import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder; import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import java.util.*; @@ -227,24 +228,24 @@ public class VariantValidationAssessor extends RodWalker numHomVarViolations++; isViolation = true; } - vContext = VariantContext.modifyFilters(vContext, filters); + + VariantContextBuilder builder = new VariantContextBuilder(vContext).filters(filters); numRecords++; // add the info fields - HashMap infoMap = new HashMap(); - infoMap.put("NoCallPct", String.format("%.1f", 100.0*noCallProp)); - infoMap.put("HomRefPct", String.format("%.1f", 100.0*homRefProp)); - infoMap.put("HomVarPct", String.format("%.1f", 100.0*homVarProp)); - infoMap.put("HetPct", String.format("%.1f", 100.0*hetProp)); - infoMap.put("HW", String.format("%.2f", hwScore)); + builder.attribute("NoCallPct", String.format("%.1f", 100.0*noCallProp)); + builder.attribute("HomRefPct", String.format("%.1f", 100.0*homRefProp)); + builder.attribute("HomVarPct", String.format("%.1f", 100.0*homVarProp)); + builder.attribute("HetPct", String.format("%.1f", 100.0*hetProp)); + builder.attribute("HW", String.format("%.2f", hwScore)); Collection altAlleles = vContext.getAlternateAlleles(); int altAlleleCount = altAlleles.size() == 0 ? 0 : vContext.getChromosomeCount(altAlleles.iterator().next()); if ( !isViolation && altAlleleCount > 0 ) numTrueVariants++; - infoMap.put(VCFConstants.ALLELE_COUNT_KEY, String.format("%d", altAlleleCount)); - infoMap.put(VCFConstants.ALLELE_NUMBER_KEY, String.format("%d", vContext.getChromosomeCount())); + builder.attribute(VCFConstants.ALLELE_COUNT_KEY, String.format("%d", altAlleleCount)); + builder.attribute(VCFConstants.ALLELE_NUMBER_KEY, String.format("%d", vContext.getChromosomeCount())); - return VariantContext.modifyAttributes(vContext, infoMap); + return builder.make(); } private double hardyWeinbergCalculation(VariantContext vc) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java index 78cfde1bd..f5928b723 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java @@ -121,22 +121,22 @@ public class VariantsToVCF extends RodWalker { Collection contexts = getVariantContexts(tracker, ref); for ( VariantContext vc : contexts ) { + VariantContextBuilder builder = new VariantContextBuilder(vc); if ( rsID != null && vc.emptyID() ) { - vc = VariantContext.modifyID(vc, rsID); + builder.id(rsID).make(); } // set the appropriate sample name if necessary if ( sampleName != null && vc.hasGenotypes() && vc.hasGenotype(variants.getName()) ) { Genotype g = Genotype.modifyName(vc.getGenotype(variants.getName()), sampleName); - GenotypesContext genotypes = GenotypesContext.create(g); - vc = VariantContext.modifyGenotypes(vc, genotypes); + builder.genotypes(g); } if ( fixReferenceBase ) { - vc = VariantContext.modifyReferencePadding(vc, ref.getBase()); + builder.referenceBaseForIndel(ref.getBase()); } - writeRecord(vc, tracker, ref.getLocus()); + writeRecord(builder.make(), tracker, ref.getLocus()); } return 1; diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java index 725cc8109..ba138a9da 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java @@ -13,6 +13,7 @@ import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.GenotypesContext; import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder; import java.io.*; import java.util.*; @@ -252,29 +253,30 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec, * @return a variant context object */ private VariantContext parseVCFLine(String[] parts) { + VariantContextBuilder builder = new VariantContextBuilder(); // increment the line count lineNo++; // parse out the required fields - String contig = getCachedString(parts[0]); + builder.chr(getCachedString(parts[0])); int pos = Integer.valueOf(parts[1]); - String id = null; + builder.start(pos); + if ( parts[2].length() == 0 ) generateException("The VCF specification requires a valid ID field"); else if ( parts[2].equals(VCFConstants.EMPTY_ID_FIELD) ) - id = VCFConstants.EMPTY_ID_FIELD; + builder.noID(); else - id = parts[2]; + builder.id(parts[2]); + String ref = getCachedString(parts[3].toUpperCase()); String alts = getCachedString(parts[4].toUpperCase()); - Double qual = parseQual(parts[5]); - String filter = getCachedString(parts[6]); - String info = new String(parts[7]); + builder.negLog10PError(parseQual(parts[5])); + builder.filters(parseFilters(getCachedString(parts[6]))); + builder.attributes(parseInfo(parts[7])); // get our alleles, filters, and setup an attribute map List alleles = parseAlleles(ref, alts, lineNo); - Set filters = parseFilters(filter); - Map attributes = parseInfo(info); // find out our current location, and clip the alleles down to their minimum length int loc = pos; @@ -286,16 +288,19 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec, loc = clipAlleles(pos, ref, alleles, newAlleles, lineNo); alleles = newAlleles; } + builder.stop(loc); + builder.alleles(alleles); // do we have genotyping data if (parts.length > NUM_STANDARD_FIELDS) { - attributes.put(VariantContext.UNPARSED_GENOTYPE_MAP_KEY, new String(parts[8])); - attributes.put(VariantContext.UNPARSED_GENOTYPE_PARSER_KEY, this); + builder.attribute(VariantContext.UNPARSED_GENOTYPE_MAP_KEY, new String(parts[8])); + builder.attribute(VariantContext.UNPARSED_GENOTYPE_PARSER_KEY, this); } VariantContext vc = null; try { - vc = new VariantContext(name, id, contig, pos, loc, alleles, qual, filters, attributes, ref.getBytes()[0]); + builder.referenceBaseForIndel(ref.getBytes()[0]); + vc = builder.make(); } catch (Exception e) { generateException(e.getMessage()); } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/StandardVCFWriter.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/StandardVCFWriter.java index 63c61cfaa..37137f716 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/StandardVCFWriter.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/StandardVCFWriter.java @@ -25,18 +25,10 @@ package org.broadinstitute.sting.utils.codecs.vcf; import net.sf.samtools.SAMSequenceDictionary; -import org.broad.tribble.Tribble; import org.broad.tribble.TribbleException; -import org.broad.tribble.index.DynamicIndexCreator; -import org.broad.tribble.index.Index; -import org.broad.tribble.index.IndexFactory; -import org.broad.tribble.util.LittleEndianOutputStream; import org.broad.tribble.util.ParsingUtils; -import org.broad.tribble.util.PositionalStream; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.*; import java.io.*; import java.lang.reflect.Array; @@ -164,10 +156,10 @@ public class StandardVCFWriter extends IndexingVCFWriter { throw new IllegalStateException("The VCF Header must be written before records can be added: " + getStreamName()); if ( doNotWriteGenotypes ) - vc = VariantContext.modifyGenotypes(vc, null); + vc = new VariantContextBuilder(vc).noGenotypes().make(); try { - vc = VariantContext.createVariantContextWithPaddedAlleles(vc, false); + vc = VariantContextUtils.createVariantContextWithPaddedAlleles(vc, false); super.add(vc); Map alleleMap = new HashMap(vc.getAlleles().size()); diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/CommonInfo.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/CommonInfo.java index 57edbbfcc..4ffc8e966 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/CommonInfo.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/CommonInfo.java @@ -14,12 +14,12 @@ import java.util.*; final class CommonInfo { public static final double NO_NEG_LOG_10PERROR = -1.0; - private static Set NO_FILTERS = Collections.unmodifiableSet(new HashSet()); + private static Set NO_FILTERS = Collections.emptySet(); private static Map NO_ATTRIBUTES = Collections.unmodifiableMap(new HashMap()); private double negLog10PError = NO_NEG_LOG_10PERROR; private String name = null; - private Set filters = NO_FILTERS; + private Set filters = null; private Map attributes = NO_ATTRIBUTES; public CommonInfo(String name, double negLog10PError, Set filters, Map attributes) { @@ -56,12 +56,20 @@ final class CommonInfo { // // --------------------------------------------------------------------------------------------------------- + public Set getFiltersMaybeNull() { + return filters; + } + public Set getFilters() { - return Collections.unmodifiableSet(filters); + return filters == null ? NO_FILTERS : Collections.unmodifiableSet(filters); + } + + public boolean filtersWereApplied() { + return filters != null; } public boolean isFiltered() { - return filters.size() > 0; + return filters == null ? false : filters.size() > 0; } public boolean isNotFiltered() { @@ -69,8 +77,8 @@ final class CommonInfo { } public void addFilter(String filter) { - if ( filters == NO_FILTERS ) // immutable -> mutable - filters = new HashSet(filters); + if ( filters == null ) // immutable -> mutable + filters = new HashSet(); if ( filter == null ) throw new IllegalArgumentException("BUG: Attempting to add null filter " + this); if ( getFilters().contains(filter) ) throw new IllegalArgumentException("BUG: Attempting to add duplicate filter " + filter + " at " + this); @@ -83,15 +91,6 @@ final class CommonInfo { addFilter(f); } - public void clearFilters() { - filters = new HashSet(); - } - - public void setFilters(Collection filters) { - clearFilters(); - addFilters(filters); - } - // --------------------------------------------------------------------------------------------------------- // // Working with log error rates diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java index 28f2d85fc..f1574cec2 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java @@ -23,7 +23,6 @@ public class Genotype { protected Type type = null; protected boolean isPhased = false; - protected boolean filtersWereAppliedToContext; public Genotype(String sampleName, List alleles, double negLog10PError, Set filters, Map attributes, boolean isPhased) { this(sampleName, alleles, negLog10PError, filters, attributes, isPhased, null); @@ -35,7 +34,6 @@ public class Genotype { commonInfo = new CommonInfo(sampleName, negLog10PError, filters, attributes); if ( log10Likelihoods != null ) commonInfo.putAttribute(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, GenotypeLikelihoods.fromLog10Likelihoods(log10Likelihoods)); - filtersWereAppliedToContext = filters != null; this.isPhased = isPhased; validate(); } @@ -333,9 +331,10 @@ public class Genotype { // --------------------------------------------------------------------------------------------------------- public String getSampleName() { return commonInfo.getName(); } public Set getFilters() { return commonInfo.getFilters(); } + public Set getFiltersMaybeNull() { return commonInfo.getFiltersMaybeNull(); } public boolean isFiltered() { return commonInfo.isFiltered(); } public boolean isNotFiltered() { return commonInfo.isNotFiltered(); } - public boolean filtersWereApplied() { return filtersWereAppliedToContext; } + public boolean filtersWereApplied() { return commonInfo.filtersWereApplied(); } public boolean hasNegLog10PError() { return commonInfo.hasNegLog10PError(); } public double getNegLog10PError() { return commonInfo.getNegLog10PError(); } public double getPhredScaledQual() { return commonInfo.getPhredScaledQual(); } diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java index 7798e259c..455a9b997 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java @@ -1,5 +1,6 @@ package org.broadinstitute.sting.utils.variantcontext; +import org.apache.commons.lang.Validate; import org.broad.tribble.Feature; import org.broad.tribble.TribbleException; import org.broad.tribble.util.ParsingUtils; @@ -200,15 +201,29 @@ public class VariantContext implements Feature { // to enable tribble intergrati // set to the alt allele when biallelic, otherwise == null private Allele ALT = null; - // were filters applied? - final private boolean filtersWereAppliedToContext; + /* cached monomorphic value: null -> not yet computed, False, True */ + private Boolean monomorphic = null; // --------------------------------------------------------------------------------------------------------- // - // constructors + // validation mode // // --------------------------------------------------------------------------------------------------------- + public enum Validation { + REF_PADDING, + ALLELES, + GENOTYPES + } + + private final static EnumSet ALL_VALIDATION = EnumSet.allOf(Validation.class); + private final static EnumSet NO_VALIDATION = EnumSet.noneOf(Validation.class); + + // --------------------------------------------------------------------------------------------------------- + // + // constructors: see VariantContextBuilder + // + // --------------------------------------------------------------------------------------------------------- /** * the complete constructor. Makes a complete VariantContext from its arguments @@ -224,14 +239,12 @@ public class VariantContext implements Feature { // to enable tribble intergrati * @param filters filters: use null for unfiltered and empty set for passes filters * @param attributes attributes * @param referenceBaseForIndel padded reference base + * + * @deprecated replaced by {@link VariantContextBuilder} */ - public VariantContext(String source, String ID, String contig, long start, long stop, Collection alleles, GenotypesContext genotypes, double negLog10PError, Set filters, Map attributes, Byte referenceBaseForIndel) { - this(source, ID, contig, start, stop, alleles, genotypes, negLog10PError, filters, attributes, referenceBaseForIndel, false, true); - } - @Deprecated - public VariantContext(String source, String contig, long start, long stop, Collection alleles, GenotypesContext genotypes, double negLog10PError, Set filters, Map attributes, Byte referenceBaseForIndel) { - this(source, VCFConstants.EMPTY_ID_FIELD, contig, start, stop, alleles, genotypes, negLog10PError, filters, attributes, referenceBaseForIndel); + public VariantContext(String source, String ID, String contig, long start, long stop, Collection alleles, GenotypesContext genotypes, double negLog10PError, Set filters, Map attributes, Byte referenceBaseForIndel) { + this(source, ID, contig, start, stop, alleles, genotypes, negLog10PError, filters, attributes, referenceBaseForIndel, false, ALL_VALIDATION); } @@ -247,66 +260,12 @@ public class VariantContext implements Feature { // to enable tribble intergrati * @param negLog10PError qual * @param filters filters: use null for unfiltered and empty set for passes filters * @param attributes attributes + * + * @deprecated replaced by {@link VariantContextBuilder} */ + @Deprecated public VariantContext(String source, String ID, String contig, long start, long stop, Collection alleles, GenotypesContext genotypes, double negLog10PError, Set filters, Map attributes) { - this(source, ID, contig, start, stop, alleles, genotypes, negLog10PError, filters, attributes, null, false, true); - } - - @Deprecated - public VariantContext(String source, String contig, long start, long stop, Collection alleles, GenotypesContext genotypes, double negLog10PError, Set filters, Map attributes) { - this(source, VCFConstants.EMPTY_ID_FIELD, contig, start, stop, alleles, genotypes, negLog10PError, filters, attributes); - } - - /** - * Makes a VariantContext from its arguments without parsing the genotypes. - * Note that this constructor assumes that if there is genotype data, then it's been put into - * the attributes with the UNPARSED_GENOTYPE_MAP_KEY and that the codec has been added with the - * UNPARSED_GENOTYPE_PARSER_KEY. It doesn't validate that this is the case because it's possible - * that there is no genotype data. - * - * @param source source - * @param contig the contig - * @param start the start base (one based) - * @param stop the stop reference base (one based) - * @param alleles alleles - * @param negLog10PError qual - * @param filters filters: use null for unfiltered and empty set for passes filters - * @param attributes attributes - * @param referenceBaseForIndel padded reference base - */ - public VariantContext(String source, String ID, String contig, long start, long stop, Collection alleles, double negLog10PError, Set filters, Map attributes, Byte referenceBaseForIndel) { - this(source, ID, contig, start, stop, alleles, NO_GENOTYPES, negLog10PError, filters, attributes, referenceBaseForIndel, true, true); - } - - @Deprecated - public VariantContext(String source, String contig, long start, long stop, Collection alleles, double negLog10PError, Set filters, Map attributes, Byte referenceBaseForIndel) { - this(source, VCFConstants.EMPTY_ID_FIELD, contig, start, stop, alleles, negLog10PError, filters, attributes, referenceBaseForIndel); - } - - /** - * Create a new VariantContext - * - * @param source source - * @param contig the contig - * @param start the start base (one based) - * @param stop the stop reference base (one based) - * @param alleles alleles - * @param genotypes genotypes set - * @param negLog10PError qual - * @param filters filters: use null for unfiltered and empty set for passes filters - * @param attributes attributes - */ - public VariantContext(String source, String ID, String contig, long start, long stop, Collection alleles, Collection genotypes, double negLog10PError, Set filters, Map attributes) { - this(source, ID, contig, start, stop, alleles, - GenotypesContext.copy(genotypes), - negLog10PError, filters, attributes, null, false, true); - } - - @Deprecated - public VariantContext(String source, String contig, long start, long stop, Collection alleles, Collection genotypes, double negLog10PError, Set filters, Map attributes) { - this(source, VCFConstants.EMPTY_ID_FIELD, contig, start, stop, alleles, - GenotypesContext.copy(genotypes), - negLog10PError, filters, attributes); + this(source, ID, contig, start, stop, alleles, genotypes, negLog10PError, filters, attributes, null, false, ALL_VALIDATION); } /** @@ -317,33 +276,12 @@ public class VariantContext implements Feature { // to enable tribble intergrati * @param start the start base (one based) * @param stop the stop reference base (one based) * @param alleles alleles - */ - public VariantContext(String source, String ID, String contig, long start, long stop, Collection alleles) { - this(source, ID, contig, start, stop, alleles, NO_GENOTYPES, CommonInfo.NO_NEG_LOG_10PERROR, null, null, null, false, true); - } - - @Deprecated - public VariantContext(String source, String contig, long start, long stop, Collection alleles) { - this(source, VCFConstants.EMPTY_ID_FIELD, contig, start, stop, alleles); - } - - /** - * Create a new variant context with genotypes but without Perror, filters, and attributes * - * @param source source - * @param contig the contig - * @param start the start base (one based) - * @param stop the stop reference base (one based) - * @param alleles alleles - * @param genotypes genotypes + * @deprecated replaced by {@link VariantContextBuilder} */ - public VariantContext(String source, String ID, String contig, long start, long stop, Collection alleles, Collection genotypes) { - this(source, ID, contig, start, stop, alleles, genotypes, CommonInfo.NO_NEG_LOG_10PERROR, null, null); - } - @Deprecated - public VariantContext(String source, String contig, long start, long stop, Collection alleles, Collection genotypes) { - this(source, VCFConstants.EMPTY_ID_FIELD, contig, start, stop, alleles, genotypes, CommonInfo.NO_NEG_LOG_10PERROR, null, null); + public VariantContext(String source, String ID, String contig, long start, long stop, Collection alleles) { + this(source, ID, contig, start, stop, alleles, NO_GENOTYPES, CommonInfo.NO_NEG_LOG_10PERROR, null, null, null, false, ALL_VALIDATION); } /** @@ -351,8 +289,8 @@ public class VariantContext implements Feature { // to enable tribble intergrati * * @param other the VariantContext to copy */ - public VariantContext(VariantContext other) { - this(other.getSource(), other.getID(), other.getChr(), other.getStart(), other.getEnd() , other.getAlleles(), other.getGenotypes(), other.getNegLog10PError(), other.filtersWereApplied() ? other.getFilters() : null, other.getAttributes(), other.REFERENCE_BASE_FOR_INDEL, false, true); + protected VariantContext(VariantContext other) { + this(other.getSource(), other.getID(), other.getChr(), other.getStart(), other.getEnd() , other.getAlleles(), other.getGenotypes(), other.getNegLog10PError(), other.filtersWereApplied() ? other.getFilters() : null, other.getAttributes(), other.REFERENCE_BASE_FOR_INDEL, false, NO_VALIDATION); } /** @@ -369,14 +307,14 @@ public class VariantContext implements Feature { // to enable tribble intergrati * @param attributes attributes * @param referenceBaseForIndel padded reference base * @param genotypesAreUnparsed true if the genotypes have not yet been parsed - * @param performValidation if true, call validate() as the final step in construction + * @param validationToPerform set of validation steps to take */ - private VariantContext(String source, String ID, + protected VariantContext(String source, String ID, String contig, long start, long stop, Collection alleles, GenotypesContext genotypes, double negLog10PError, Set filters, Map attributes, Byte referenceBaseForIndel, boolean genotypesAreUnparsed, - boolean performValidation ) { + EnumSet validationToPerform ) { if ( contig == null ) { throw new IllegalArgumentException("Contig cannot be null"); } this.contig = contig; this.start = start; @@ -398,7 +336,6 @@ public class VariantContext implements Feature { // to enable tribble intergrati } this.commonInfo = new CommonInfo(source, negLog10PError, filters, attributes); - filtersWereAppliedToContext = filters != null; REFERENCE_BASE_FOR_INDEL = referenceBaseForIndel; // todo -- remove me when this check is no longer necessary @@ -426,69 +363,11 @@ public class VariantContext implements Feature { // to enable tribble intergrati } } - if ( performValidation ) { - validate(); + if ( ! validationToPerform.isEmpty() ) { + validate(validationToPerform); } } - // --------------------------------------------------------------------------------------------------------- - // - // Partial-cloning routines (because Variant Context is immutable). - // - // IMPORTANT: These routines assume that the VariantContext on which they're called is already valid. - // Due to this assumption, they explicitly tell the constructor NOT to perform validation by - // calling validate(), and instead perform validation only on the data that's changed. - // - // Note that we don't call vc.getGenotypes() because that triggers the lazy loading. - // Also note that we need to create a new attributes map because it's unmodifiable and the constructor may try to modify it. - // - // --------------------------------------------------------------------------------------------------------- - - public static VariantContext modifyGenotypes(VariantContext vc, GenotypesContext genotypes) { - VariantContext modifiedVC = new VariantContext(vc.getSource(), vc.getID(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, vc.getAttributes(), vc.getReferenceBaseForIndel(), false, false); - modifiedVC.validateGenotypes(); - return modifiedVC; - } - - public static VariantContext modifyLocation(VariantContext vc, String chr, int start, int end) { - VariantContext modifiedVC = new VariantContext(vc.getSource(), vc.getID(), chr, start, end, vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, vc.getAttributes(), vc.getReferenceBaseForIndel(), true, false); - - // Since start and end have changed, we need to call both validateAlleles() and validateReferencePadding(), - // since those validation routines rely on the values of start and end: - modifiedVC.validateAlleles(); - modifiedVC.validateReferencePadding(); - - return modifiedVC; - } - - public static VariantContext modifyFilters(VariantContext vc, Set filters) { - return new VariantContext(vc.getSource(), vc.getID(), vc.getChr(), vc.getStart(), vc.getEnd() , vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), filters, new HashMap(vc.getAttributes()), vc.getReferenceBaseForIndel(), true, false); - } - - public static VariantContext modifyAttributes(VariantContext vc, Map attributes) { - return new VariantContext(vc.getSource(), vc.getID(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, attributes, vc.getReferenceBaseForIndel(), true, false); - } - - public static VariantContext modifyAttribute(VariantContext vc, final String key, final Object value) { - Map attributes = new HashMap(vc.getAttributes()); - attributes.put(key, value); - return new VariantContext(vc.getSource(), vc.getID(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, attributes, vc.getReferenceBaseForIndel(), true, false); - } - - public static VariantContext modifyReferencePadding(VariantContext vc, Byte b) { - VariantContext modifiedVC = new VariantContext(vc.getSource(), vc.getID(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, vc.getAttributes(), b, true, false); - modifiedVC.validateReferencePadding(); - return modifiedVC; - } - - public static VariantContext modifyPErrorFiltersAndAttributes(VariantContext vc, double negLog10PError, Set filters, Map attributes) { - return new VariantContext(vc.getSource(), vc.getID(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), vc.genotypes, negLog10PError, filters, attributes, vc.getReferenceBaseForIndel(), true, false); - } - - public static VariantContext modifyID(final VariantContext vc, final String id) { - return new VariantContext(vc.getSource(), id, vc.getChr(), vc.getStart(), vc.getEnd() , vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, new HashMap(vc.getAttributes()), vc.getReferenceBaseForIndel(), true, false); - } - // --------------------------------------------------------------------------------------------------------- // // Selectors @@ -771,10 +650,11 @@ public class VariantContext implements Feature { // to enable tribble intergrati // // --------------------------------------------------------------------------------------------------------- public String getSource() { return commonInfo.getName(); } + public Set getFiltersMaybeNull() { return commonInfo.getFiltersMaybeNull(); } public Set getFilters() { return commonInfo.getFilters(); } public boolean isFiltered() { return commonInfo.isFiltered(); } public boolean isNotFiltered() { return commonInfo.isNotFiltered(); } - public boolean filtersWereApplied() { return filtersWereAppliedToContext; } + public boolean filtersWereApplied() { return commonInfo.filtersWereApplied(); } public boolean hasNegLog10PError() { return commonInfo.hasNegLog10PError(); } public double getNegLog10PError() { return commonInfo.getNegLog10PError(); } public double getPhredScaledQual() { return commonInfo.getPhredScaledQual(); } @@ -1092,7 +972,9 @@ public class VariantContext implements Feature { // to enable tribble intergrati * @return true if it's monomorphic */ public boolean isMonomorphic() { - return ! isVariant() || (hasGenotypes() && getChromosomeCount(getReference()) == getChromosomeCount()); + if ( monomorphic == null ) + monomorphic = ! isVariant() || (hasGenotypes() && getChromosomeCount(getReference()) == getChromosomeCount()); + return monomorphic; } /** @@ -1301,23 +1183,14 @@ public class VariantContext implements Feature { // to enable tribble intergrati // // --------------------------------------------------------------------------------------------------------- - /** - * To be called by any modifying routines - */ - private boolean validate() { - return validate(true); - } - - private boolean validate(boolean throwException) { - try { - validateReferencePadding(); - validateAlleles(); - validateGenotypes(); - } catch ( IllegalArgumentException e ) { - if ( throwException ) - throw e; - else - return false; + private boolean validate(final EnumSet validationToPerform) { + for (final Validation val : validationToPerform ) { + switch (val) { + case ALLELES: validateAlleles(); break; + case REF_PADDING: validateReferencePadding(); break; + case GENOTYPES: validateGenotypes(); break; + default: throw new IllegalArgumentException("Unexpected validation mode " + val); + } } return true; @@ -1512,8 +1385,8 @@ public class VariantContext implements Feature { // to enable tribble intergrati return (int)stop; } - private boolean hasSymbolicAlleles() { - for (Allele a: getAlleles()) { + public boolean hasSymbolicAlleles() { + for (final Allele a: getAlleles()) { if (a.isSymbolic()) { return true; } @@ -1521,84 +1394,6 @@ public class VariantContext implements Feature { // to enable tribble intergrati return false; } - public static VariantContext createVariantContextWithPaddedAlleles(VariantContext inputVC, boolean refBaseShouldBeAppliedToEndOfAlleles) { - - // see if we need to pad common reference base from all alleles - boolean padVC; - - // We need to pad a VC with a common base if the length of the reference allele is less than the length of the VariantContext. - // This happens because the position of e.g. an indel is always one before the actual event (as per VCF convention). - long locLength = (inputVC.getEnd() - inputVC.getStart()) + 1; - if (inputVC.hasSymbolicAlleles()) - padVC = true; - else if (inputVC.getReference().length() == locLength) - padVC = false; - else if (inputVC.getReference().length() == locLength-1) - padVC = true; - else throw new IllegalArgumentException("Badly formed variant context at location " + String.valueOf(inputVC.getStart()) + - " in contig " + inputVC.getChr() + ". Reference length must be at most one base shorter than location size"); - - // nothing to do if we don't need to pad bases - if (padVC) { - - if ( !inputVC.hasReferenceBaseForIndel() ) - throw new ReviewedStingException("Badly formed variant context at location " + inputVC.getChr() + ":" + inputVC.getStart() + "; no padded reference base is available."); - - Byte refByte = inputVC.getReferenceBaseForIndel(); - - List alleles = new ArrayList(); - - for (Allele a : inputVC.getAlleles()) { - // get bases for current allele and create a new one with trimmed bases - if (a.isSymbolic()) { - alleles.add(a); - } else { - String newBases; - if ( refBaseShouldBeAppliedToEndOfAlleles ) - newBases = a.getBaseString() + new String(new byte[]{refByte}); - else - newBases = new String(new byte[]{refByte}) + a.getBaseString(); - alleles.add(Allele.create(newBases,a.isReference())); - } - } - - // now we can recreate new genotypes with trimmed alleles - GenotypesContext genotypes = GenotypesContext.create(inputVC.getNSamples()); - for (final Genotype g : inputVC.getGenotypes() ) { - List inAlleles = g.getAlleles(); - List newGenotypeAlleles = new ArrayList(g.getAlleles().size()); - for (Allele a : inAlleles) { - if (a.isCalled()) { - if (a.isSymbolic()) { - newGenotypeAlleles.add(a); - } else { - String newBases; - if ( refBaseShouldBeAppliedToEndOfAlleles ) - newBases = a.getBaseString() + new String(new byte[]{refByte}); - else - newBases = new String(new byte[]{refByte}) + a.getBaseString(); - newGenotypeAlleles.add(Allele.create(newBases,a.isReference())); - } - } - else { - // add no-call allele - newGenotypeAlleles.add(Allele.NO_CALL); - } - } - genotypes.add(new Genotype(g.getSampleName(), newGenotypeAlleles, g.getNegLog10PError(), - g.getFilters(), g.getAttributes(), g.isPhased())); - - } - - // Do not change the filter state if filters were not applied to this context - Set inputVCFilters = inputVC.filtersWereAppliedToContext ? inputVC.getFilters() : null; - return new VariantContext(inputVC.getSource(), inputVC.getID(), inputVC.getChr(), inputVC.getStart(), inputVC.getEnd(), alleles, genotypes, inputVC.getNegLog10PError(), inputVCFilters, inputVC.getAttributes(),refByte); - } - else - return inputVC; - - } - public Allele getAltAlleleWithHighestAlleleCount() { // first idea: get two alleles with highest AC Allele best = null; diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextBuilder.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextBuilder.java new file mode 100644 index 000000000..fb92f60a2 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextBuilder.java @@ -0,0 +1,245 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.variantcontext; + +import com.google.java.contract.Requires; +import org.broad.tribble.Feature; +import org.broad.tribble.TribbleException; +import org.broad.tribble.util.ParsingUtils; +import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; +import org.broadinstitute.sting.utils.codecs.vcf.VCFParser; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + +import java.util.*; + +/** + * Builder class for VariantContext + * + * @author depristo + */ +public class VariantContextBuilder { + // required fields + private String source = null; + private String contig = null; + private long start = -1; + private long stop = -1; + private Collection alleles = null; + + // optional -> these are set to the appropriate default value + private String ID = VCFConstants.EMPTY_ID_FIELD; + private GenotypesContext genotypes = GenotypesContext.NO_GENOTYPES; + private double negLog10PError = VariantContext.NO_NEG_LOG_10PERROR; + private Set filters = null; + private Map attributes = null; + private boolean attributesCanBeModified = false; + private Byte referenceBaseForIndel = null; + private boolean genotypesAreUnparsed = false; + + /** enum of what must be validated */ + final private EnumSet toValidate = EnumSet.noneOf(VariantContext.Validation.class); + + public VariantContextBuilder() { + + } + + public VariantContextBuilder(String source, String contig, long start, long stop, Collection alleles) { + this.source = source; + this.contig = contig; + this.start = start; + this.stop = stop; + this.alleles = alleles; + toValidate.add(VariantContext.Validation.ALLELES); + } + + /** + * Returns a new builder based on parent -- the new VC will have all fields initialized + * to their corresponding values in parent. This is the best way to create a derived VariantContext + * + * @param parent + */ + public VariantContextBuilder(VariantContext parent) { + this.alleles = parent.alleles; + this.attributes = parent.getAttributes(); + this.attributesCanBeModified = false; + this.contig = parent.contig; + this.filters = parent.getFiltersMaybeNull(); + this.genotypes = parent.genotypes; + this.genotypesAreUnparsed = parent.hasAttribute(VariantContext.UNPARSED_GENOTYPE_MAP_KEY); + this.ID = parent.getID(); + this.negLog10PError = parent.getNegLog10PError(); + this.referenceBaseForIndel = parent.getReferenceBaseForIndel(); + this.source = parent.getSource(); + this.start = parent.getStart(); + this.stop = parent.getEnd(); + } + + @Requires({"alleles != null", "!alleles.isEmpty()"}) + public VariantContextBuilder alleles(final Collection alleles) { + this.alleles = alleles; + toValidate.add(VariantContext.Validation.ALLELES); + return this; + } + + /** + * Attributes can be null -> meaning there are no attributes. After + * calling this routine the builder assumes it can modify the attributes + * object here, if subsequent calls are made to set attribute values + * @param attributes + */ + public VariantContextBuilder attributes(final Map attributes) { + this.attributes = attributes; + this.attributesCanBeModified = true; + return this; + } + + public VariantContextBuilder attribute(final String key, final Object value) { + if ( ! attributesCanBeModified ) { + this.attributesCanBeModified = true; + this.attributes = new HashMap(); + } + attributes.put(key, value); + return this; + } + + /** + * filters can be null -> meaning there are no filters + * @param filters + */ + public VariantContextBuilder filters(final Set filters) { + this.filters = filters; + return this; + } + + public VariantContextBuilder filters(final String ... filters) { + filters(new HashSet(Arrays.asList(filters))); + return this; + } + + public VariantContextBuilder passFilters() { + return filters(VariantContext.PASSES_FILTERS); + } + + public VariantContextBuilder unfiltered() { + this.filters = null; + return this; + } + + /** + * genotypes can be null -> meaning there are no genotypes + * @param genotypes + */ + public VariantContextBuilder genotypes(final GenotypesContext genotypes) { + this.genotypes = genotypes; + if ( genotypes != null ) + toValidate.add(VariantContext.Validation.GENOTYPES); + return this; + } + + public VariantContextBuilder genotypes(final Collection genotypes) { + return genotypes(GenotypesContext.copy(genotypes)); + } + + public VariantContextBuilder genotypes(final Genotype ... genotypes) { + return genotypes(GenotypesContext.copy(Arrays.asList(genotypes))); + } + + public VariantContextBuilder noGenotypes() { + this.genotypes = null; + return this; + } + + public VariantContextBuilder genotypesAreUnparsed(final boolean genotypesAreUnparsed) { + this.genotypesAreUnparsed = genotypesAreUnparsed; + return this; + } + + @Requires("ID != null") + public VariantContextBuilder id(final String ID) { + this.ID = ID; + return this; + } + + public VariantContextBuilder noID() { + return id(VCFConstants.EMPTY_ID_FIELD); + } + + @Requires("negLog10PError <= 0") + public VariantContextBuilder negLog10PError(final double negLog10PError) { + this.negLog10PError = negLog10PError; + return this; + } + + /** + * Null means no refBase is available + * @param referenceBaseForIndel + */ + public VariantContextBuilder referenceBaseForIndel(final Byte referenceBaseForIndel) { + this.referenceBaseForIndel = referenceBaseForIndel; + toValidate.add(VariantContext.Validation.REF_PADDING); + return this; + } + + @Requires("source != null") + public VariantContextBuilder source(final String source) { + this.source = source; + return this; + } + + @Requires({"contig != null", "start >= 0", "stop >= 0"}) + public VariantContextBuilder loc(final String contig, final long start, final long stop) { + this.contig = contig; + this.start = start; + this.stop = stop; + toValidate.add(VariantContext.Validation.ALLELES); + toValidate.add(VariantContext.Validation.REF_PADDING); + return this; + } + + @Requires({"contig != null", "start >= 0", "stop >= 0"}) + public VariantContextBuilder chr(final String contig) { + this.contig = contig; + return this; + } + + @Requires({"start >= 0"}) + public VariantContextBuilder start(final long start) { + this.start = start; + toValidate.add(VariantContext.Validation.ALLELES); + toValidate.add(VariantContext.Validation.REF_PADDING); + return this; + } + + @Requires({"stop >= 0"}) + public VariantContextBuilder stop(final long stop) { + this.stop = stop; + return this; + } + + public VariantContext make() { + return new VariantContext(source, ID, contig, start, stop, alleles, + genotypes, negLog10PError, filters, attributes, + referenceBaseForIndel, genotypesAreUnparsed, toValidate); + } +} diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java index 5d2c86f84..d9057ea8f 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java @@ -116,6 +116,82 @@ public class VariantContextUtils { return new Genotype(g.getSampleName(), g.getAlleles(), g.getNegLog10PError(), g.filtersWereApplied() ? g.getFilters() : null, attrs, g.isPhased()); } + public static VariantContext createVariantContextWithPaddedAlleles(VariantContext inputVC, boolean refBaseShouldBeAppliedToEndOfAlleles) { + // see if we need to pad common reference base from all alleles + boolean padVC; + + // We need to pad a VC with a common base if the length of the reference allele is less than the length of the VariantContext. + // This happens because the position of e.g. an indel is always one before the actual event (as per VCF convention). + long locLength = (inputVC.getEnd() - inputVC.getStart()) + 1; + if (inputVC.hasSymbolicAlleles()) + padVC = true; + else if (inputVC.getReference().length() == locLength) + padVC = false; + else if (inputVC.getReference().length() == locLength-1) + padVC = true; + else throw new IllegalArgumentException("Badly formed variant context at location " + String.valueOf(inputVC.getStart()) + + " in contig " + inputVC.getChr() + ". Reference length must be at most one base shorter than location size"); + + // nothing to do if we don't need to pad bases + if (padVC) { + if ( !inputVC.hasReferenceBaseForIndel() ) + throw new ReviewedStingException("Badly formed variant context at location " + inputVC.getChr() + ":" + inputVC.getStart() + "; no padded reference base is available."); + + Byte refByte = inputVC.getReferenceBaseForIndel(); + + List alleles = new ArrayList(); + + for (Allele a : inputVC.getAlleles()) { + // get bases for current allele and create a new one with trimmed bases + if (a.isSymbolic()) { + alleles.add(a); + } else { + String newBases; + if ( refBaseShouldBeAppliedToEndOfAlleles ) + newBases = a.getBaseString() + new String(new byte[]{refByte}); + else + newBases = new String(new byte[]{refByte}) + a.getBaseString(); + alleles.add(Allele.create(newBases,a.isReference())); + } + } + + // now we can recreate new genotypes with trimmed alleles + GenotypesContext genotypes = GenotypesContext.create(inputVC.getNSamples()); + for (final Genotype g : inputVC.getGenotypes() ) { + List inAlleles = g.getAlleles(); + List newGenotypeAlleles = new ArrayList(g.getAlleles().size()); + for (Allele a : inAlleles) { + if (a.isCalled()) { + if (a.isSymbolic()) { + newGenotypeAlleles.add(a); + } else { + String newBases; + if ( refBaseShouldBeAppliedToEndOfAlleles ) + newBases = a.getBaseString() + new String(new byte[]{refByte}); + else + newBases = new String(new byte[]{refByte}) + a.getBaseString(); + newGenotypeAlleles.add(Allele.create(newBases,a.isReference())); + } + } + else { + // add no-call allele + newGenotypeAlleles.add(Allele.NO_CALL); + } + } + genotypes.add(new Genotype(g.getSampleName(), newGenotypeAlleles, g.getNegLog10PError(), + g.getFilters(), g.getAttributes(), g.isPhased())); + + } + + // Do not change the filter state if filters were not applied to this context + Set inputVCFilters = inputVC.getFiltersMaybeNull(); + return new VariantContext(inputVC.getSource(), inputVC.getID(), inputVC.getChr(), inputVC.getStart(), inputVC.getEnd(), alleles, genotypes, inputVC.getNegLog10PError(), inputVCFilters, inputVC.getAttributes(),refByte); + } + else + return inputVC; + + } + /** * A simple but common wrapper for matching VariantContext objects using JEXL expressions */ @@ -257,7 +333,7 @@ public class VariantContextUtils { @Requires("vc != null") @Ensures("result != null") public static VariantContext sitesOnlyVariantContext(VariantContext vc) { - return VariantContext.modifyGenotypes(vc, null); + return new VariantContextBuilder(vc).noGenotypes().make(); } /** @@ -378,7 +454,7 @@ public class VariantContextUtils { for (VariantContext vc : prepaddedVCs) { // also a reasonable place to remove filtered calls, if needed if ( ! filteredAreUncalled || vc.isNotFiltered() ) - VCs.add(VariantContext.createVariantContextWithPaddedAlleles(vc, false)); + VCs.add(createVariantContextWithPaddedAlleles(vc, false)); } if ( VCs.size() == 0 ) // everything is filtered out and we're filteredAreUncalled return null; @@ -896,7 +972,7 @@ public class VariantContextUtils { newGenotypes.add(Genotype.modifyAttributes(genotype, attrs)); } - return VariantContext.modifyGenotypes(vc, newGenotypes); + return new VariantContextBuilder(vc).genotypes(newGenotypes).make(); } public static BaseUtils.BaseSubstitutionType getSNPSubstitutionType(VariantContext context) { diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/qc/TestVariantContextWalker.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/qc/TestVariantContextWalker.java deleted file mode 100755 index 6bb764f44..000000000 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/qc/TestVariantContextWalker.java +++ /dev/null @@ -1,137 +0,0 @@ -/* - * Copyright (c) 2010 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR - * THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.walkers.qc; - -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.ArgumentCollection; -import org.broadinstitute.sting.commandline.Output; -import org.broadinstitute.sting.gatk.arguments.DbsnpArgumentCollection; -import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors; -import org.broadinstitute.sting.gatk.walkers.Reference; -import org.broadinstitute.sting.gatk.walkers.RodWalker; -import org.broadinstitute.sting.gatk.walkers.Window; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; -import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; - -import java.io.PrintStream; -import java.util.*; - -/** - * Test routine for new VariantContext object - */ -@Reference(window=@Window(start=-20,stop=1)) -public class TestVariantContextWalker extends RodWalker { - @Output - PrintStream out; - - @ArgumentCollection - protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection(); - - @Argument(fullName="takeFirstOnly", doc="Only take the first second at a locus, as opposed to all", required=false) - boolean takeFirstOnly = false; - - @Argument(fullName="onlyContextsOfType", doc="Only take variant contexts of this type", required=false) - VariantContext.Type onlyOfThisType = null; - - @Argument(fullName="onlyContextsStartinAtCurrentPosition", doc="Only take variant contexts at actually start at the current position, excluding those at span to the current location but start earlier", required=false) - boolean onlyContextsStartinAtCurrentPosition = false; - - @Argument(fullName="printPerLocus", doc="If true, we'll print the variant contexts, in addition to counts", required=false) - boolean printContexts = false; - - @Argument(fullName="outputVCF", doc="If provided, we'll convert the first input context into a VCF", required=false) - VCFWriter writer = null; - - private boolean wroteHeader = false; - - public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - if ( ref == null ) - return 0; - else { - EnumSet allowedTypes = onlyOfThisType == null ? null : EnumSet.of(onlyOfThisType); - - int n = 0; - List contexts; - if ( onlyContextsStartinAtCurrentPosition ) - contexts = tracker.getValues(variantCollection.variants, context.getLocation()); - else // ! onlyContextsStartinAtCurrentPosition - contexts = tracker.getValues(variantCollection.variants); - - for ( VariantContext vc : contexts ) { - if ( allowedTypes == null || allowedTypes.contains(vc.getType()) ) { - // we need to trigger decoding of the genotype string to pass integration tests - vc.getGenotypes(); - - if ( writer != null && n == 0 ) { - if ( ! wroteHeader ) { - writer.writeHeader(createVCFHeader(vc)); - wroteHeader = true; - } - - writer.add(vc); - } - - n++; - if ( printContexts ) out.printf(" %s%n", vc); - if ( takeFirstOnly ) break; - } - } - - if ( n > 0 && printContexts ) { - out.printf("%s => had %d variant context objects%n", context.getLocation(), n); - out.printf("---------------------------------------------%n"); - } - - return n; - } - } - - private static VCFHeader createVCFHeader(VariantContext vc) { - return new VCFHeader(new HashSet(), vc.getGenotypes().getSampleNamesOrderedByName()); - } - - public Integer reduceInit() { - return 0; - } - - public Integer reduce(Integer point, Integer sum) { - return point + sum; - } - - @Override - public void onTraversalDone(Integer result) { - // Double check traversal result to make count is the same. - // TODO: Is this check necessary? - out.println("[REDUCE RESULT] Traversal result is: " + result); - } -} diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextBenchmark.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextBenchmark.java index fae7cb05a..6bc71a76d 100644 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextBenchmark.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextBenchmark.java @@ -230,7 +230,7 @@ public class VariantContextBenchmark extends SimpleBenchmark { for ( final Genotype g : vc.getGenotypes() ) { gc.add(new Genotype(g.getSampleName()+"_"+i, g)); } - toMerge.add(VariantContext.modifyGenotypes(vc, gc)); + toMerge.add(new VariantContextBuilder(vc).genotypes(gc).make()); } VariantContextUtils.simpleMerge(b37GenomeLocParser, toMerge, null, diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java index 5bc72e132..38c4f84ab 100755 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java @@ -255,7 +255,7 @@ public class VariantContextUnitTest extends BaseTest { public void testCreatingPartiallyCalledGenotype() { List alleles = Arrays.asList(Aref, C); Genotype g = new Genotype("foo", Arrays.asList(C, Allele.NO_CALL), 10); - VariantContext vc = new VariantContext("test", VCFConstants.EMPTY_ID_FIELD, snpLoc, snpLocStart, snpLocStop, alleles, Arrays.asList(g)); + VariantContext vc = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, alleles).genotypes(g).make(); Assert.assertTrue(vc.isSNP()); Assert.assertEquals(vc.getNAlleles(), 2); @@ -328,7 +328,8 @@ public class VariantContextUnitTest extends BaseTest { Genotype g2 = new Genotype("AT", Arrays.asList(Aref, T), 10); Genotype g3 = new Genotype("TT", Arrays.asList(T, T), 10); - VariantContext vc = new VariantContext("test", VCFConstants.EMPTY_ID_FIELD, snpLoc,snpLocStart, snpLocStop, alleles, Arrays.asList(g1, g2, g3)); + VariantContext vc = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, alleles) + .genotypes(g1, g2, g3).make(); Assert.assertTrue(vc.hasGenotypes()); Assert.assertFalse(vc.isMonomorphic()); @@ -367,7 +368,8 @@ public class VariantContextUnitTest extends BaseTest { Genotype g5 = new Genotype("dd", Arrays.asList(del, del), 10); Genotype g6 = new Genotype("..", Arrays.asList(Allele.NO_CALL, Allele.NO_CALL), 10); - VariantContext vc = new VariantContext("test", VCFConstants.EMPTY_ID_FIELD, snpLoc,snpLocStart, snpLocStop, alleles, Arrays.asList(g1, g2, g3, g4, g5, g6)); + VariantContext vc = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, alleles) + .genotypes(g1, g2, g3, g4, g5, g6).make(); Assert.assertTrue(vc.hasGenotypes()); Assert.assertFalse(vc.isMonomorphic()); @@ -392,7 +394,8 @@ public class VariantContextUnitTest extends BaseTest { Genotype g1 = new Genotype("AA1", Arrays.asList(Aref, Aref), 10); Genotype g2 = new Genotype("AA2", Arrays.asList(Aref, Aref), 10); Genotype g3 = new Genotype("..", Arrays.asList(Allele.NO_CALL, Allele.NO_CALL), 10); - VariantContext vc = new VariantContext("test", VCFConstants.EMPTY_ID_FIELD, snpLoc,snpLocStart, snpLocStop, alleles, Arrays.asList(g1, g2, g3)); + VariantContext vc = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, alleles) + .genotypes(g1, g2, g3).make(); Assert.assertTrue(vc.hasGenotypes()); Assert.assertTrue(vc.isMonomorphic()); @@ -412,21 +415,20 @@ public class VariantContextUnitTest extends BaseTest { Genotype g1 = new Genotype("AA", Arrays.asList(Aref, Aref), 10); Genotype g2 = new Genotype("AT", Arrays.asList(Aref, T), 10); - VariantContext vc = new VariantContext("test", VCFConstants.EMPTY_ID_FIELD, snpLoc,snpLocStart, snpLocStop, alleles, Arrays.asList(g1,g2)); + VariantContext vc = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, alleles).genotypes(g1, g2).make(); Assert.assertTrue(vc.isNotFiltered()); Assert.assertFalse(vc.isFiltered()); Assert.assertEquals(0, vc.getFilters().size()); - Set filters = new HashSet(Arrays.asList("BAD_SNP_BAD!")); - vc = new VariantContext("test", VCFConstants.EMPTY_ID_FIELD, snpLoc,snpLocStart, snpLocStop, alleles, Arrays.asList(g1,g2), VariantContext.NO_NEG_LOG_10PERROR, filters, null); + vc = new VariantContextBuilder(vc).filters("BAD_SNP_BAD!").make(); Assert.assertFalse(vc.isNotFiltered()); Assert.assertTrue(vc.isFiltered()); Assert.assertEquals(1, vc.getFilters().size()); - filters = new HashSet(Arrays.asList("BAD_SNP_BAD!", "REALLY_BAD_SNP", "CHRIST_THIS_IS_TERRIBLE")); - vc = new VariantContext("test", VCFConstants.EMPTY_ID_FIELD, snpLoc,snpLocStart, snpLocStop, alleles, Arrays.asList(g1,g2), VariantContext.NO_NEG_LOG_10PERROR, filters, null); + Set filters = new HashSet(Arrays.asList("BAD_SNP_BAD!", "REALLY_BAD_SNP", "CHRIST_THIS_IS_TERRIBLE")); + vc = new VariantContextBuilder(vc).filters(filters).make(); Assert.assertFalse(vc.isNotFiltered()); Assert.assertTrue(vc.isFiltered()); @@ -441,7 +443,7 @@ public class VariantContextUnitTest extends BaseTest { Genotype g3 = new Genotype("TT", Arrays.asList(T, T), 10); Genotype g4 = new Genotype("..", Arrays.asList(Allele.NO_CALL, Allele.NO_CALL), 10); Genotype g5 = new Genotype("--", Arrays.asList(del, del), 10); - VariantContext vc = new VariantContext("test", VCFConstants.EMPTY_ID_FIELD, snpLoc,snpLocStart, snpLocStop , alleles, Arrays.asList(g1,g2,g3,g4,g5)); + VariantContext vc = new VariantContextBuilder("genotypes", snpLoc, snpLocStart, snpLocStop, alleles).genotypes(g1,g2,g3,g4,g5).make(); VariantContext vc12 = vc.subContextFromSamples(new HashSet(Arrays.asList(g1.getSampleName(), g2.getSampleName()))); VariantContext vc1 = vc.subContextFromSamples(new HashSet(Arrays.asList(g1.getSampleName()))); @@ -495,7 +497,7 @@ public class VariantContextUnitTest extends BaseTest { Genotype g2 = new Genotype("AT", Arrays.asList(Aref, T), 10); Genotype g3 = new Genotype("TT", Arrays.asList(T, T), 10); GenotypesContext gc = GenotypesContext.create(g1, g2, g3); - VariantContext vc = new VariantContext("genotypes", VCFConstants.EMPTY_ID_FIELD, snpLoc, snpLocStart, snpLocStop, Arrays.asList(Aref, T), gc); + VariantContext vc = new VariantContextBuilder("genotypes", snpLoc, snpLocStart, snpLocStop, Arrays.asList(Aref, T)).genotypes(gc).make(); Assert.assertEquals(vc.getGenotype("AA"), g1); Assert.assertEquals(vc.getGenotype("AT"), g2); @@ -586,7 +588,7 @@ public class VariantContextUnitTest extends BaseTest { private SitesAndGenotypesVC(String name, VariantContext original) { super(SitesAndGenotypesVC.class, name); this.vc = original; - this.copy = new VariantContext(original); + this.copy = new VariantContextBuilder(original).make(); } public String toString() { @@ -600,8 +602,8 @@ public class VariantContextUnitTest extends BaseTest { Genotype g2 = new Genotype("AT", Arrays.asList(Aref, T), 10); Genotype g3 = new Genotype("TT", Arrays.asList(T, T), 10); - VariantContext sites = new VariantContext("sites", VCFConstants.EMPTY_ID_FIELD, snpLoc, snpLocStart, snpLocStop, Arrays.asList(Aref, T)); - VariantContext genotypes = new VariantContext("genotypes", VCFConstants.EMPTY_ID_FIELD, snpLoc, snpLocStart, snpLocStop, Arrays.asList(Aref, T), Arrays.asList(g1, g2, g3)); + VariantContext sites = new VariantContextBuilder("sites", snpLoc, snpLocStart, snpLocStop, Arrays.asList(Aref, T)).make(); + VariantContext genotypes = new VariantContextBuilder(sites).source("genotypes").genotypes(g1, g2, g3).make(); new SitesAndGenotypesVC("sites", sites); new SitesAndGenotypesVC("genotypes", genotypes); @@ -616,32 +618,32 @@ public class VariantContextUnitTest extends BaseTest { // -------------------------------------------------------------------------------- @Test(dataProvider = "SitesAndGenotypesVC") public void runModifyVCTests(SitesAndGenotypesVC cfg) { - VariantContext modified = VariantContext.modifyLocation(cfg.vc, "chr2", 123, 123); + VariantContext modified = new VariantContextBuilder(cfg.vc).loc("chr2", 123, 123).make(); Assert.assertEquals(modified.getChr(), "chr2"); Assert.assertEquals(modified.getStart(), 123); Assert.assertEquals(modified.getEnd(), 123); - modified = VariantContext.modifyID(cfg.vc, "newID"); + modified = new VariantContextBuilder(cfg.vc).id("newID").make(); Assert.assertEquals(modified.getID(), "newID"); Set newFilters = Collections.singleton("newFilter"); - modified = VariantContext.modifyFilters(cfg.vc, newFilters); + modified = new VariantContextBuilder(cfg.vc).filters(newFilters).make(); Assert.assertEquals(modified.getFilters(), newFilters); - modified = VariantContext.modifyAttribute(cfg.vc, "AC", 1); + modified = new VariantContextBuilder(cfg.vc).attribute("AC", 1).make(); Assert.assertEquals(modified.getAttribute("AC"), 1); - modified = VariantContext.modifyAttribute(modified, "AC", 2); + modified = new VariantContextBuilder(modified).attribute("AC", 2).make(); Assert.assertEquals(modified.getAttribute("AC"), 2); - modified = VariantContext.modifyAttributes(modified, null); + modified = new VariantContextBuilder(modified).attributes(null).make(); Assert.assertTrue(modified.getAttributes().isEmpty()); Genotype g1 = new Genotype("AA2", Arrays.asList(Aref, Aref), 10); Genotype g2 = new Genotype("AT2", Arrays.asList(Aref, T), 10); Genotype g3 = new Genotype("TT2", Arrays.asList(T, T), 10); GenotypesContext gc = GenotypesContext.create(g1,g2,g3); - modified = VariantContext.modifyGenotypes(cfg.vc, gc); + modified = new VariantContextBuilder(cfg.vc).genotypes(gc).make(); Assert.assertEquals(modified.getGenotypes(), gc); - modified = VariantContext.modifyGenotypes(cfg.vc, null); + modified = new VariantContextBuilder(cfg.vc).noGenotypes().make(); Assert.assertTrue(modified.getGenotypes().isEmpty()); // test that original hasn't changed @@ -697,7 +699,7 @@ public class VariantContextUnitTest extends BaseTest { Genotype g3 = new Genotype("TT", Arrays.asList(T, T), 10); GenotypesContext gc = GenotypesContext.create(g1, g2, g3); - VariantContext vc = new VariantContext("genotypes", VCFConstants.EMPTY_ID_FIELD, snpLoc, snpLocStart, snpLocStop, Arrays.asList(Aref, T), gc); + VariantContext vc = new VariantContextBuilder("genotypes", snpLoc, snpLocStart, snpLocStop, Arrays.asList(Aref, T)).genotypes(gc).make(); VariantContext sub = cfg.updateAlleles ? vc.subContextFromSamples(cfg.samples) : vc.subContextFromSamples(cfg.samples, vc.getAlleles()); // unchanged attributes should be the same @@ -782,8 +784,7 @@ public class VariantContextUnitTest extends BaseTest { gc.add(new Genotype(name, Arrays.asList(Aref, T))); } - VariantContext vc = new VariantContext("genotypes", VCFConstants.EMPTY_ID_FIELD, snpLoc, - snpLocStart, snpLocStop, Arrays.asList(Aref, T), gc); + VariantContext vc = new VariantContextBuilder("genotypes", snpLoc, snpLocStart, snpLocStop, Arrays.asList(Aref, T)).genotypes(gc).make(); // same sample names => success Assert.assertEquals(vc.getSampleNames(), new HashSet(cfg.sampleNames), "vc.getSampleNames() = " + vc.getSampleNames()); @@ -823,9 +824,7 @@ public class VariantContextUnitTest extends BaseTest { } - VariantContext vc = new VariantContext("genotypes", VCFConstants.EMPTY_ID_FIELD, snpLoc, - snpLocStart, snpLocStop, Arrays.asList(Aref, T), gc); - + VariantContext vc = new VariantContextBuilder("genotypes", snpLoc, snpLocStart, snpLocStop, Arrays.asList(Aref, T)).genotypes(gc).make(); Assert.assertEquals(vc.getNSamples(), nSamples); if ( nSamples > 0 ) { Assert.assertEquals(vc.isPolymorphic(), nT > 0); diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java index 805781fe0..a48596ca1 100644 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java @@ -250,7 +250,7 @@ public class VariantContextUtilsUnitTest extends BaseTest { final List inputs = new ArrayList(); for ( final String id : cfg.inputs ) { - inputs.add(VariantContext.modifyID(snpVC1, id)); + inputs.add(new VariantContextBuilder(snpVC1).id(id).make()); } final VariantContext merged = VariantContextUtils.simpleMerge(genomeLocParser,