Merge pull request #465 from broadinstitute/eb_improvements_to_ref_confidence_merger
Improvements to ref confidence merger
This commit is contained in:
commit
50cd6781b3
|
|
@ -56,7 +56,6 @@ import org.broadinstitute.sting.gatk.walkers.PartitionBy;
|
|||
import org.broadinstitute.sting.gatk.walkers.PartitionType;
|
||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.R.RScriptExecutor;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
|
|
@ -155,7 +154,8 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
|||
* These calls should be unfiltered and annotated with the error covariates that are intended to be used for modeling.
|
||||
*/
|
||||
@Input(fullName="input", shortName = "input", doc="The raw input variants to be recalibrated", required=true)
|
||||
public List<RodBinding<VariantContext>> input;
|
||||
public List<RodBindingCollection<VariantContext>> inputCollections;
|
||||
final private List<RodBinding<VariantContext>> input = new ArrayList<>();
|
||||
|
||||
/**
|
||||
* These additional calls should be unfiltered and annotated with the error covariates that are intended to be used for modeling.
|
||||
|
|
@ -272,7 +272,6 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
|||
throw new UserException.CommandLineException( "No truth set found! Please provide sets of known polymorphic loci marked with the truth=true ROD binding tag. For example, -resource:hapmap,VCF,known=false,training=true,truth=true,prior=12.0 hapmapFile.vcf" );
|
||||
}
|
||||
|
||||
|
||||
final Set<VCFHeaderLine> hInfo = new HashSet<>();
|
||||
ApplyRecalibration.addVQSRStandardHeaderLines(hInfo);
|
||||
recalWriter.writeHeader( new VCFHeader(hInfo) );
|
||||
|
|
@ -280,6 +279,10 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
|||
for( int iii = 0; iii < REPLICATE * 2; iii++ ) {
|
||||
replicate.add(GenomeAnalysisEngine.getRandomGenerator().nextDouble());
|
||||
}
|
||||
|
||||
// collect the actual rod bindings into a list for use later
|
||||
for ( final RodBindingCollection<VariantContext> inputCollection : inputCollections )
|
||||
input.addAll(inputCollection.getRodBindings());
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -111,13 +111,12 @@ import java.util.*;
|
|||
@Reference(window=@Window(start=-10,stop=10))
|
||||
public class CombineReferenceCalculationVariants extends RodWalker<VariantContext, VariantContextWriter> implements AnnotatorCompatible, TreeReducible<VariantContextWriter> {
|
||||
|
||||
// TODO -- allow a file of VCF paths to be entered?
|
||||
|
||||
/**
|
||||
* The VCF files to merge together
|
||||
*/
|
||||
@Input(fullName="variant", shortName = "V", doc="One or more input VCF files", required=true)
|
||||
public List<RodBinding<VariantContext>> variants;
|
||||
public List<RodBindingCollection<VariantContext>> variantCollections;
|
||||
final private List<RodBinding<VariantContext>> variants = new ArrayList<>();
|
||||
|
||||
@Output(doc="File to which variants should be written")
|
||||
protected VariantContextWriter vcfWriter = null;
|
||||
|
|
@ -169,6 +168,10 @@ public class CombineReferenceCalculationVariants extends RodWalker<VariantContex
|
|||
|
||||
// create the annotation engine
|
||||
annotationEngine = new VariantAnnotatorEngine(Arrays.asList("none"), annotationsToUse, Collections.<String>emptyList(), this, getToolkit());
|
||||
|
||||
// collect the actual rod bindings into a list for use later
|
||||
for ( final RodBindingCollection<VariantContext> variantCollection : variantCollections )
|
||||
variants.addAll(variantCollection.getRodBindings());
|
||||
}
|
||||
|
||||
public VariantContext map(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context) {
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@
|
|||
|
||||
package org.broadinstitute.sting.commandline;
|
||||
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.FeatureManager;
|
||||
|
|
@ -36,6 +37,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
|||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.lang.annotation.Annotation;
|
||||
import java.lang.reflect.*;
|
||||
import java.util.*;
|
||||
|
|
@ -306,6 +308,7 @@ public abstract class ArgumentTypeDescriptor {
|
|||
* @param source source
|
||||
* @param type type to check
|
||||
* @param matches matches
|
||||
* @param tags argument tags
|
||||
* @return the RodBinding/IntervalBinding object depending on the value of createIntervalBinding.
|
||||
*/
|
||||
protected Object parseBinding(ArgumentSource source, Type type, ArgumentMatches matches, Tags tags) {
|
||||
|
|
@ -409,6 +412,95 @@ public abstract class ArgumentTypeDescriptor {
|
|||
value, fieldName, e.getMessage()));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse the source of a RodBindingCollection, which can be either a file of RodBindings or an actual RodBinding.
|
||||
*
|
||||
* @param parsingEngine the parsing engine used to validate this argument type descriptor
|
||||
* @param source source
|
||||
* @param type type
|
||||
* @param matches matches
|
||||
* @param tags argument tags
|
||||
* @return the newly created binding object
|
||||
*/
|
||||
public Object parseRodBindingCollectionSource(final ParsingEngine parsingEngine,
|
||||
final ArgumentSource source,
|
||||
final Type type,
|
||||
final ArgumentMatches matches,
|
||||
final Tags tags) {
|
||||
|
||||
final ArgumentDefinition defaultDefinition = createDefaultArgumentDefinition(source);
|
||||
final ArgumentMatchValue value = getArgumentValue(defaultDefinition, matches);
|
||||
@SuppressWarnings("unchecked")
|
||||
Class<? extends Feature> parameterType = JVMUtils.getParameterizedTypeClass(type);
|
||||
String name = defaultDefinition.fullName;
|
||||
|
||||
// if this a list of files, get those bindings
|
||||
final File file = value.asFile();
|
||||
try {
|
||||
if (file.getAbsolutePath().endsWith(".list")) {
|
||||
return getRodBindingsCollection(file, parsingEngine, parameterType, name, tags, source.field.getName());
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new UserException.CouldNotReadInputFile(file, e);
|
||||
}
|
||||
|
||||
// otherwise, treat this as an individual binding
|
||||
final RodBinding binding = (RodBinding)parseBinding(value, parameterType, RodBinding.class, name, tags, source.field.getName());
|
||||
parsingEngine.addTags(binding, tags);
|
||||
parsingEngine.addRodBinding(binding);
|
||||
return RodBindingCollection.createRodBindingCollectionOfType(parameterType, Arrays.asList(binding));
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieve and parse a collection of RodBindings from the given file.
|
||||
*
|
||||
* @param file the source file
|
||||
* @param parsingEngine the engine responsible for parsing
|
||||
* @param parameterType the Tribble Feature parameter type
|
||||
* @param bindingName the name of the binding passed to the constructor.
|
||||
* @param defaultTags general tags for the binding used for parsing and passed to the constructor.
|
||||
* @param fieldName the name of the field that was parsed. Used for error reporting.
|
||||
* @return the newly created collection of binding objects.
|
||||
*/
|
||||
public static Object getRodBindingsCollection(final File file,
|
||||
final ParsingEngine parsingEngine,
|
||||
final Class<? extends Feature> parameterType,
|
||||
final String bindingName,
|
||||
final Tags defaultTags,
|
||||
final String fieldName) throws IOException {
|
||||
final List<RodBinding> bindings = new ArrayList<>();
|
||||
|
||||
// parse each line separately using the given Tags if none are provided on each line
|
||||
for ( final String line: FileUtils.readLines(file) ) {
|
||||
final String[] tokens = line.split("\\s+");
|
||||
final RodBinding binding;
|
||||
|
||||
if ( tokens.length == 0 ) {
|
||||
continue; // empty line, so do nothing
|
||||
}
|
||||
// use the default tags if none are provided for this binding
|
||||
else if ( tokens.length == 1 ) {
|
||||
final ArgumentMatchValue value = new ArgumentMatchStringValue(tokens[0]);
|
||||
binding = (RodBinding)parseBinding(value, parameterType, RodBinding.class, bindingName, defaultTags, fieldName);
|
||||
parsingEngine.addTags(binding, defaultTags);
|
||||
}
|
||||
// use the new tags if provided
|
||||
else if ( tokens.length == 2 ) {
|
||||
final Tags tags = ParsingMethod.parseTags(fieldName, tokens[0]);
|
||||
final ArgumentMatchValue value = new ArgumentMatchStringValue(tokens[1]);
|
||||
binding = (RodBinding)parseBinding(value, parameterType, RodBinding.class, bindingName, tags, fieldName);
|
||||
parsingEngine.addTags(binding, tags);
|
||||
} else {
|
||||
throw new UserException.BadArgumentValue(fieldName, "data lines should consist of an optional set of tags along with a path to a file; too many tokens are present for line: " + line);
|
||||
}
|
||||
|
||||
bindings.add(binding);
|
||||
parsingEngine.addRodBinding(binding);
|
||||
}
|
||||
|
||||
return RodBindingCollection.createRodBindingCollectionOfType(parameterType, bindings);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -487,14 +579,60 @@ class IntervalBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Parser for RodBindingCollection objects
|
||||
*/
|
||||
class RodBindingCollectionArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
||||
/**
|
||||
* We only want RodBindingCollection class objects
|
||||
* @param type The type to check.
|
||||
* @return true if the provided class is an RodBindingCollection.class
|
||||
*/
|
||||
@Override
|
||||
public boolean supports( final Class type ) {
|
||||
return isRodBindingCollection(type);
|
||||
}
|
||||
|
||||
public static boolean isRodBindingCollection( final Class type ) {
|
||||
return RodBindingCollection.class.isAssignableFrom(type);
|
||||
}
|
||||
|
||||
/**
|
||||
* See note from RodBindingArgumentTypeDescriptor.parse().
|
||||
*
|
||||
* @param parsingEngine parsing engine
|
||||
* @param source source
|
||||
* @param type type to check
|
||||
* @param matches matches
|
||||
* @return the IntervalBinding object.
|
||||
*/
|
||||
@Override
|
||||
public Object parse(final ParsingEngine parsingEngine, final ArgumentSource source, final Type type, final ArgumentMatches matches) {
|
||||
final Tags tags = getArgumentTags(matches);
|
||||
return parseRodBindingCollectionSource(parsingEngine, source, type, matches, tags);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse simple argument types: java primitives, wrapper classes, and anything that has
|
||||
* a simple String constructor.
|
||||
*/
|
||||
class SimpleArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
||||
|
||||
/**
|
||||
* @param type the class type
|
||||
* @return true if this class is a binding type, false otherwise
|
||||
*/
|
||||
private boolean isBinding(final Class type) {
|
||||
return RodBindingArgumentTypeDescriptor.isRodBinding(type) ||
|
||||
IntervalBindingArgumentTypeDescriptor.isIntervalBinding(type) ||
|
||||
RodBindingCollectionArgumentTypeDescriptor.isRodBindingCollection(type);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public boolean supports( Class type ) {
|
||||
if ( RodBindingArgumentTypeDescriptor.isRodBinding(type) || IntervalBindingArgumentTypeDescriptor.isIntervalBinding(type) ) return false;
|
||||
if ( isBinding(type) ) return false;
|
||||
if ( type.isPrimitive() ) return true;
|
||||
if ( type.isEnum() ) return true;
|
||||
if ( primitiveToWrapperMap.containsValue(type) ) return true;
|
||||
|
|
|
|||
|
|
@ -57,7 +57,7 @@ public final class IntervalBinding<T extends Feature> {
|
|||
|
||||
@Requires({"type != null", "rawName != null", "source != null", "tribbleType != null", "tags != null"})
|
||||
public IntervalBinding(Class<T> type, final String rawName, final String source, final String tribbleType, final Tags tags) {
|
||||
featureIntervals = new RodBinding<T>(type, rawName, source, tribbleType, tags);
|
||||
featureIntervals = new RodBinding<>(type, rawName, source, tribbleType, tags);
|
||||
}
|
||||
|
||||
@Requires({"intervalArgument != null"})
|
||||
|
|
@ -66,9 +66,7 @@ public final class IntervalBinding<T extends Feature> {
|
|||
}
|
||||
|
||||
public String getSource() {
|
||||
if ( featureIntervals != null )
|
||||
return featureIntervals.getSource();
|
||||
return stringIntervals;
|
||||
return ( featureIntervals != null ? featureIntervals.getSource() : stringIntervals );
|
||||
}
|
||||
|
||||
public List<GenomeLoc> getIntervals(final GenomeAnalysisEngine toolkit) {
|
||||
|
|
@ -79,7 +77,7 @@ public final class IntervalBinding<T extends Feature> {
|
|||
List<GenomeLoc> intervals;
|
||||
|
||||
if ( featureIntervals != null ) {
|
||||
intervals = new ArrayList<GenomeLoc>();
|
||||
intervals = new ArrayList<>();
|
||||
|
||||
// TODO -- after ROD system cleanup, go through the ROD system so that we can handle things like gzipped files
|
||||
|
||||
|
|
|
|||
|
|
@ -83,6 +83,7 @@ public class ParsingEngine {
|
|||
private static final Set<ArgumentTypeDescriptor> STANDARD_ARGUMENT_TYPE_DESCRIPTORS = new LinkedHashSet<ArgumentTypeDescriptor>( Arrays.asList(new SimpleArgumentTypeDescriptor(),
|
||||
new IntervalBindingArgumentTypeDescriptor(),
|
||||
new RodBindingArgumentTypeDescriptor(),
|
||||
new RodBindingCollectionArgumentTypeDescriptor(),
|
||||
new CompoundArgumentTypeDescriptor(),
|
||||
new MultiplexArgumentTypeDescriptor()) );
|
||||
|
||||
|
|
|
|||
|
|
@ -32,7 +32,7 @@ import org.broad.tribble.Feature;
|
|||
import java.util.*;
|
||||
|
||||
/**
|
||||
* A RodBinding representing a walker argument that gets bound to a ROD track.
|
||||
* A RodBinding represents a walker argument that gets bound to a ROD track.
|
||||
*
|
||||
* The RodBinding<T> is a formal GATK argument that bridges between a walker and
|
||||
* the RefMetaDataTracker to obtain data about this rod track at runtime. The RodBinding
|
||||
|
|
@ -77,7 +77,7 @@ public final class RodBinding<T extends Feature> {
|
|||
final private String tribbleType;
|
||||
/** The command line tags associated with this RodBinding */
|
||||
final private Tags tags;
|
||||
/** The Java class expected for this RodBinding. Must correspond to the type emited by Tribble */
|
||||
/** The Java class expected for this RodBinding. Must correspond to the type emitted by Tribble */
|
||||
final private Class<T> type;
|
||||
/** True for all RodBindings except the special UNBOUND binding, which is the default for optional arguments */
|
||||
final private boolean bound;
|
||||
|
|
|
|||
|
|
@ -0,0 +1,89 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.commandline;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
||||
import java.lang.reflect.Constructor;
|
||||
import java.lang.reflect.InvocationTargetException;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* A RodBindingCollection represents a collection of RodBindings.
|
||||
*
|
||||
* The RodBindingCollection<T> is a formal GATK argument that is used to specify a file of RodBindings.
|
||||
*
|
||||
*/
|
||||
public final class RodBindingCollection<T extends Feature> {
|
||||
|
||||
/** The Java class expected for this RodBinding. Must correspond to the type emitted by Tribble */
|
||||
final private Class<T> type;
|
||||
|
||||
private Collection<RodBinding<T>> rodBindings;
|
||||
|
||||
public RodBindingCollection(final Class<T> type, final Collection<RodBinding<T>> rodBindings) {
|
||||
this.type = type;
|
||||
this.rodBindings = Collections.unmodifiableCollection(rodBindings);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the collection of RodBindings
|
||||
*/
|
||||
final public Collection<RodBinding<T>> getRodBindings() {
|
||||
return rodBindings;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the string name of the tribble type, such as vcf, bed, etc.
|
||||
*/
|
||||
@Ensures({"result != null"})
|
||||
final public Class<T> getType() {
|
||||
return type;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("(RodBindingCollection %s)", getRodBindings());
|
||||
}
|
||||
|
||||
/**
|
||||
* Utility method to help construct a RodBindingCollection of the given Feature type
|
||||
*
|
||||
* @param type the Feature type
|
||||
* @param rodBindings the rod bindings to put into the collection
|
||||
* @return a new RodBindingCollection object
|
||||
*/
|
||||
public static Object createRodBindingCollectionOfType(final Class<? extends Feature> type, final Collection<RodBinding> rodBindings) {
|
||||
try {
|
||||
final Constructor ctor = RodBindingCollection.class.getConstructor(Class.class, Collection.class);
|
||||
return ctor.newInstance(type, rodBindings);
|
||||
} catch (final Exception e) {
|
||||
throw new IllegalStateException("Failed to create a RodBindingCollection for type " + type);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -131,7 +131,8 @@ public class CombineVariants extends RodWalker<Integer, Integer> implements Tree
|
|||
* a rod priority list is provided.
|
||||
*/
|
||||
@Input(fullName="variant", shortName = "V", doc="Input VCF file", required=true)
|
||||
public List<RodBinding<VariantContext>> variants;
|
||||
public List<RodBindingCollection<VariantContext>> variantCollections;
|
||||
final private List<RodBinding<VariantContext>> variants = new ArrayList<>();
|
||||
|
||||
@Output(doc="File to which variants should be written")
|
||||
protected VariantContextWriter vcfWriter = null;
|
||||
|
|
@ -230,6 +231,10 @@ public class CombineVariants extends RodWalker<Integer, Integer> implements Tree
|
|||
VCFHeader vcfHeader = new VCFHeader(headerLines, samples);
|
||||
vcfHeader.setWriteCommandLine(!SUPPRESS_COMMAND_LINE_HEADER);
|
||||
vcfWriter.writeHeader(vcfHeader);
|
||||
|
||||
// collect the actual rod bindings into a list for use later
|
||||
for ( final RodBindingCollection<VariantContext> variantCollection : variantCollections )
|
||||
variants.addAll(variantCollection.getRodBindings());
|
||||
}
|
||||
|
||||
private void validateAnnotateUnionArguments() {
|
||||
|
|
|
|||
|
|
@ -33,6 +33,7 @@ import org.broad.tribble.TribbleException;
|
|||
import org.broad.tribble.util.popgen.HardyWeinbergCalculation;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.variant.variantcontext.*;
|
||||
import org.broadinstitute.variant.vcf.VCFConstants;
|
||||
|
||||
|
|
@ -1064,7 +1065,6 @@ public class GATKVariantContextUtils {
|
|||
final Set<String> inconsistentAttributes = new HashSet<>();
|
||||
final Set<String> rsIDs = new LinkedHashSet<>(1); // most of the time there's one id
|
||||
|
||||
VariantContext longestVC = first;
|
||||
int depth = 0;
|
||||
final Map<String, List<Comparable>> annotationMap = new LinkedHashMap<>();
|
||||
GenotypesContext genotypes = GenotypesContext.create();
|
||||
|
|
@ -1084,10 +1084,6 @@ public class GATKVariantContextUtils {
|
|||
if ( isSpanningEvent )
|
||||
continue;
|
||||
|
||||
// keep track of the longest location that starts here
|
||||
if ( VariantContextUtils.getSize(vc) > VariantContextUtils.getSize(longestVC) )
|
||||
longestVC = vc;
|
||||
|
||||
// special case ID (just preserve it)
|
||||
if ( vc.hasID() ) rsIDs.add(vc.getID());
|
||||
|
||||
|
|
@ -1105,15 +1101,15 @@ public class GATKVariantContextUtils {
|
|||
if ( depth > 0 )
|
||||
attributes.put(VCFConstants.DEPTH_KEY, String.valueOf(depth));
|
||||
|
||||
// remove stale AC and AF based attributes
|
||||
removeStaleAttributesAfterMerge(attributes);
|
||||
|
||||
final String ID = rsIDs.isEmpty() ? VCFConstants.EMPTY_ID_FIELD : Utils.join(",", rsIDs);
|
||||
|
||||
final VariantContextBuilder builder = new VariantContextBuilder().source(name).id(ID).alleles(alleles)
|
||||
.loc(longestVC.getChr(), longestVC.getStart(), longestVC.getEnd())
|
||||
.chr(loc.getContig()).start(loc.getStart()).computeEndFromAlleles(alleles, loc.getStart())
|
||||
.genotypes(genotypes).unfiltered().attributes(new TreeMap<>(attributes)).log10PError(CommonInfo.NO_LOG10_PERROR); // we will need to regenotype later
|
||||
|
||||
// remove stale AC and AF based attributes
|
||||
removeStaleAttributesAfterMerge(builder);
|
||||
|
||||
return builder.make();
|
||||
}
|
||||
|
||||
|
|
@ -1147,16 +1143,17 @@ public class GATKVariantContextUtils {
|
|||
}
|
||||
|
||||
/**
|
||||
* Remove the stale attributes from the merged VariantContext (builder)
|
||||
* Remove the stale attributes from the merged set
|
||||
*
|
||||
* @param builder the VC builder
|
||||
* @param attributes the attribute map
|
||||
*/
|
||||
private static void removeStaleAttributesAfterMerge(final VariantContextBuilder builder) {
|
||||
builder.rmAttributes(Arrays.asList(VCFConstants.ALLELE_COUNT_KEY,
|
||||
VCFConstants.ALLELE_FREQUENCY_KEY,
|
||||
VCFConstants.ALLELE_NUMBER_KEY,
|
||||
VCFConstants.MLE_ALLELE_COUNT_KEY,
|
||||
VCFConstants.MLE_ALLELE_FREQUENCY_KEY));
|
||||
private static void removeStaleAttributesAfterMerge(final Map<String, Object> attributes) {
|
||||
attributes.remove(VCFConstants.ALLELE_COUNT_KEY);
|
||||
attributes.remove(VCFConstants.ALLELE_FREQUENCY_KEY);
|
||||
attributes.remove(VCFConstants.ALLELE_NUMBER_KEY);
|
||||
attributes.remove(VCFConstants.MLE_ALLELE_COUNT_KEY);
|
||||
attributes.remove(VCFConstants.MLE_ALLELE_FREQUENCY_KEY);
|
||||
attributes.remove(VCFConstants.END_KEY);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -1544,7 +1541,7 @@ public class GATKVariantContextUtils {
|
|||
final String name = g.getSampleName();
|
||||
if ( !mergedGenotypes.containsSample(name) ) {
|
||||
// we need to modify it even if it already contains all of the alleles because we need to purge the <ALT> PLs out anyways
|
||||
final int[] indexesOfRelevantAlleles = getIndexesOfRelevantAlleles(remappedAlleles, targetAlleles);
|
||||
final int[] indexesOfRelevantAlleles = getIndexesOfRelevantAlleles(remappedAlleles, targetAlleles, VC.getStart());
|
||||
final int[] PLs = generatePLs(g, indexesOfRelevantAlleles);
|
||||
// note that we set the alleles to null here (as we expect it to be re-genotyped)
|
||||
final Genotype newG = new GenotypeBuilder(g).name(name).alleles(null).PL(PLs).noAD().noGQ().make();
|
||||
|
|
@ -1559,15 +1556,16 @@ public class GATKVariantContextUtils {
|
|||
*
|
||||
* @param remappedAlleles the list of alleles to evaluate
|
||||
* @param targetAlleles the target list of alleles
|
||||
* @param position position to use for error messages
|
||||
* @return non-null array of ints representing indexes
|
||||
*/
|
||||
protected static int[] getIndexesOfRelevantAlleles(final List<Allele> remappedAlleles, final List<Allele> targetAlleles) {
|
||||
protected static int[] getIndexesOfRelevantAlleles(final List<Allele> remappedAlleles, final List<Allele> targetAlleles, final int position) {
|
||||
|
||||
if ( remappedAlleles == null || remappedAlleles.size() == 0 ) throw new IllegalArgumentException("The list of input alleles must not be null or empty");
|
||||
if ( targetAlleles == null || targetAlleles.size() == 0 ) throw new IllegalArgumentException("The list of target alleles must not be null or empty");
|
||||
|
||||
if ( !remappedAlleles.contains(NON_REF_SYMBOLIC_ALLELE) )
|
||||
throw new IllegalArgumentException("The list of input alleles must contain " + NON_REF_SYMBOLIC_ALLELE + " as an allele; please use the Haplotype Caller with gVCF output to generate appropriate records");
|
||||
throw new UserException("The list of input alleles must contain " + NON_REF_SYMBOLIC_ALLELE + " as an allele but that is not the case at position " + position + "; please use the Haplotype Caller with gVCF output to generate appropriate records");
|
||||
final int indexOfGenericAlt = remappedAlleles.indexOf(NON_REF_SYMBOLIC_ALLELE);
|
||||
|
||||
final int[] indexMapping = new int[targetAlleles.size()];
|
||||
|
|
|
|||
|
|
@ -0,0 +1,126 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.commandline;
|
||||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContext;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeMethod;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileWriter;
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
|
||||
public class RodBindingCollectionUnitTest extends BaseTest {
|
||||
|
||||
private ParsingEngine parsingEngine;
|
||||
private Tags mytags;
|
||||
|
||||
private static final String defaultTagString = "VCF";
|
||||
private static final String testVCFFileName = privateTestDir + "empty.vcf";
|
||||
private static final String testListFileName = privateTestDir + "oneVCF.list";
|
||||
|
||||
@BeforeMethod
|
||||
public void setUp() {
|
||||
parsingEngine = new ParsingEngine(null);
|
||||
RodBinding.resetNameCounter();
|
||||
mytags = new Tags();
|
||||
mytags.addPositionalTag(defaultTagString);
|
||||
}
|
||||
|
||||
private class RodBindingCollectionArgProvider {
|
||||
@Argument(fullName="input",doc="input",shortName="V")
|
||||
public RodBindingCollection<VariantContext> input;
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testStandardVCF() {
|
||||
final String[] commandLine = new String[] {"-V", testVCFFileName};
|
||||
|
||||
parsingEngine.addArgumentSource( RodBindingCollectionArgProvider.class );
|
||||
parsingEngine.parse( commandLine );
|
||||
parsingEngine.validate();
|
||||
|
||||
final RodBindingCollectionArgProvider argProvider = new RodBindingCollectionArgProvider();
|
||||
parsingEngine.loadArgumentsIntoObject( argProvider );
|
||||
|
||||
Assert.assertEquals(argProvider.input.getRodBindings().iterator().next().getSource(), testVCFFileName, "Argument is not correctly initialized");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testList() {
|
||||
final String[] commandLine = new String[] {"-V", testListFileName};
|
||||
|
||||
parsingEngine.addArgumentSource(RodBindingCollectionArgProvider.class);
|
||||
parsingEngine.parse( commandLine );
|
||||
parsingEngine.validate();
|
||||
|
||||
final RodBindingCollectionArgProvider argProvider = new RodBindingCollectionArgProvider();
|
||||
parsingEngine.loadArgumentsIntoObject( argProvider );
|
||||
|
||||
Assert.assertEquals(argProvider.input.getRodBindings().iterator().next().getSource(), "private/testdata/empty.vcf", "Argument is not correctly initialized");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDefaultTagsInFile() throws IOException {
|
||||
|
||||
final File testFile = File.createTempFile("RodBindingCollectionUnitTest.defaultTags", ".list");
|
||||
testFile.deleteOnExit();
|
||||
final FileWriter writer = new FileWriter(testFile);
|
||||
writer.write(testVCFFileName, 0, testVCFFileName.length());
|
||||
writer.close();
|
||||
|
||||
ArgumentTypeDescriptor.getRodBindingsCollection(testFile, parsingEngine, VariantContext.class, "foo", mytags, "input");
|
||||
|
||||
final Collection<RodBinding> bindings = parsingEngine.getRodBindings();
|
||||
Assert.assertNotNull(bindings);
|
||||
Assert.assertEquals(bindings.size(), 1);
|
||||
|
||||
final RodBinding binding = bindings.iterator().next();
|
||||
Assert.assertEquals(parsingEngine.getTags(binding), mytags);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testOverrideTagsInFile() throws IOException {
|
||||
final File testFile = File.createTempFile("RodBindingCollectionUnitTest.overrideTags", ".list");
|
||||
testFile.deleteOnExit();
|
||||
final FileWriter writer = new FileWriter(testFile);
|
||||
final String textToWrite = "foo " + testVCFFileName;
|
||||
writer.write(textToWrite, 0, textToWrite.length());
|
||||
writer.close();
|
||||
|
||||
ArgumentTypeDescriptor.getRodBindingsCollection(testFile, parsingEngine, VariantContext.class, "foo", mytags, "input");
|
||||
|
||||
final Collection<RodBinding> bindings = parsingEngine.getRodBindings();
|
||||
Assert.assertNotNull(bindings);
|
||||
Assert.assertEquals(bindings.size(), 1);
|
||||
|
||||
final RodBinding binding = bindings.iterator().next();
|
||||
Assert.assertNotEquals(parsingEngine.getTags(binding), mytags);
|
||||
}
|
||||
}
|
||||
|
|
@ -29,6 +29,7 @@ import org.broadinstitute.sting.BaseTest;
|
|||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.variant.variantcontext.*;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeSuite;
|
||||
|
|
@ -1463,14 +1464,14 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest {
|
|||
}
|
||||
}
|
||||
|
||||
@Test(expectedExceptions = IllegalArgumentException.class)
|
||||
@Test(expectedExceptions = UserException.class)
|
||||
public void testGetIndexesOfRelevantAllelesWithNoALT() {
|
||||
|
||||
final List<Allele> alleles1 = new ArrayList<>(1);
|
||||
alleles1.add(Allele.create("A", true));
|
||||
final List<Allele> alleles2 = new ArrayList<>(1);
|
||||
alleles2.add(Allele.create("A", true));
|
||||
GATKVariantContextUtils.getIndexesOfRelevantAlleles(alleles1, alleles2);
|
||||
GATKVariantContextUtils.getIndexesOfRelevantAlleles(alleles1, alleles2, -1);
|
||||
Assert.fail("We should have thrown an exception because the <ALT> allele was not present");
|
||||
}
|
||||
|
||||
|
|
@ -1502,7 +1503,7 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest {
|
|||
if ( allelesIndex > 0 )
|
||||
myAlleles.add(allAlleles.get(allelesIndex));
|
||||
|
||||
final int[] indexes = GATKVariantContextUtils.getIndexesOfRelevantAlleles(myAlleles, allAlleles);
|
||||
final int[] indexes = GATKVariantContextUtils.getIndexesOfRelevantAlleles(myAlleles, allAlleles, -1);
|
||||
|
||||
Assert.assertEquals(indexes.length, allAlleles.size());
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue