Merge branch 'master' of ssh://gsa4.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
65037b87da
2
ivy.xml
2
ivy.xml
|
|
@ -97,7 +97,7 @@
|
|||
<!-- snpEff annotator for pipelines -->
|
||||
<dependency org="net.sf.snpeff" name="snpeff" rev="2.0.5" />
|
||||
|
||||
<!-- MongoDB for the project to be named later (Bjorn/Nexus/Navel) -->
|
||||
<!-- MongoDB for the GXDB project -->
|
||||
<dependency org="org.mongodb" name="mongo-java-driver" rev="2.7.3"/>
|
||||
|
||||
<!-- Exclude dependencies on sun libraries where the downloads aren't available but included in the jvm. -->
|
||||
|
|
|
|||
|
|
@ -289,7 +289,7 @@ public abstract class ArgumentTypeDescriptor {
|
|||
return field.isAnnotationPresent(Hidden.class);
|
||||
}
|
||||
|
||||
public Class makeRawTypeIfNecessary(Type t) {
|
||||
public static Class makeRawTypeIfNecessary(Type t) {
|
||||
if ( t == null )
|
||||
return null;
|
||||
else if ( t instanceof ParameterizedType )
|
||||
|
|
@ -300,6 +300,115 @@ public abstract class ArgumentTypeDescriptor {
|
|||
throw new IllegalArgumentException("Unable to determine Class-derived component type of field: " + t);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* The actual argument parsing method.
|
||||
* @param source source
|
||||
* @param type type to check
|
||||
* @param matches matches
|
||||
* @return the RodBinding/IntervalBinding object depending on the value of createIntervalBinding.
|
||||
*/
|
||||
protected Object parseBinding(ArgumentSource source, Type type, ArgumentMatches matches, Tags tags) {
|
||||
ArgumentDefinition defaultDefinition = createDefaultArgumentDefinition(source);
|
||||
String value = getArgumentValue(defaultDefinition, matches);
|
||||
@SuppressWarnings("unchecked")
|
||||
Class<? extends Feature> parameterType = JVMUtils.getParameterizedTypeClass(type);
|
||||
String name = defaultDefinition.fullName;
|
||||
|
||||
return parseBinding(value, parameterType, type, name, tags, source.field.getName());
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param value The source of the binding
|
||||
* @param parameterType The Tribble Feature parameter type
|
||||
* @param bindingClass The class type for the binding (ex: RodBinding, IntervalBinding, etc.) Must have the correct constructor for creating the binding.
|
||||
* @param bindingName The name of the binding passed to the constructor.
|
||||
* @param tags Tags for the binding used for parsing and passed to the constructor.
|
||||
* @param fieldName The name of the field that was parsed. Used for error reporting.
|
||||
* @return The newly created binding object of type bindingClass.
|
||||
*/
|
||||
public static Object parseBinding(String value, Class<? extends Feature> parameterType, Type bindingClass,
|
||||
String bindingName, Tags tags, String fieldName) {
|
||||
try {
|
||||
String tribbleType = null;
|
||||
// must have one or two tag values here
|
||||
if ( tags.getPositionalTags().size() > 2 ) {
|
||||
throw new UserException.CommandLineException(
|
||||
String.format("Unexpected number of positional tags for argument %s : %s. " +
|
||||
"Rod bindings only support -X:type and -X:name,type argument styles",
|
||||
value, fieldName));
|
||||
} else if ( tags.getPositionalTags().size() == 2 ) {
|
||||
// -X:name,type style
|
||||
bindingName = tags.getPositionalTags().get(0);
|
||||
tribbleType = tags.getPositionalTags().get(1);
|
||||
|
||||
FeatureManager manager = new FeatureManager();
|
||||
if ( manager.getByName(tribbleType) == null )
|
||||
throw new UserException.UnknownTribbleType(
|
||||
tribbleType,
|
||||
String.format("Unable to find tribble type '%s' provided on the command line. " +
|
||||
"Please select a correct type from among the supported types:%n%s",
|
||||
tribbleType, manager.userFriendlyListOfAvailableFeatures(parameterType)));
|
||||
|
||||
} else {
|
||||
// case with 0 or 1 positional tags
|
||||
FeatureManager manager = new FeatureManager();
|
||||
|
||||
// -X:type style is a type when we cannot determine the type dynamically
|
||||
String tag1 = tags.getPositionalTags().size() == 1 ? tags.getPositionalTags().get(0) : null;
|
||||
if ( tag1 != null ) {
|
||||
if ( manager.getByName(tag1) != null ) // this a type
|
||||
tribbleType = tag1;
|
||||
else
|
||||
bindingName = tag1;
|
||||
}
|
||||
|
||||
if ( tribbleType == null ) {
|
||||
// try to determine the file type dynamically
|
||||
File file = new File(value);
|
||||
if ( file.canRead() && file.isFile() ) {
|
||||
FeatureManager.FeatureDescriptor featureDescriptor = manager.getByFiletype(file);
|
||||
if ( featureDescriptor != null ) {
|
||||
tribbleType = featureDescriptor.getName();
|
||||
logger.info("Dynamically determined type of " + file + " to be " + tribbleType);
|
||||
}
|
||||
}
|
||||
|
||||
if ( tribbleType == null ) {
|
||||
// IntervalBinding can be created from a normal String
|
||||
Class rawType = (makeRawTypeIfNecessary(bindingClass));
|
||||
try {
|
||||
return rawType.getConstructor(String.class).newInstance(value);
|
||||
} catch (NoSuchMethodException e) {
|
||||
/* ignore */
|
||||
}
|
||||
|
||||
if ( ! file.exists() ) {
|
||||
throw new UserException.CouldNotReadInputFile(file, "file does not exist");
|
||||
} else if ( ! file.canRead() || ! file.isFile() ) {
|
||||
throw new UserException.CouldNotReadInputFile(file, "file could not be read");
|
||||
} else {
|
||||
throw new UserException.CommandLineException(
|
||||
String.format("No tribble type was provided on the command line and the type of the file could not be determined dynamically. " +
|
||||
"Please add an explicit type tag :NAME listing the correct type from among the supported types:%n%s",
|
||||
manager.userFriendlyListOfAvailableFeatures(parameterType)));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Constructor ctor = (makeRawTypeIfNecessary(bindingClass)).getConstructor(Class.class, String.class, String.class, String.class, Tags.class);
|
||||
return ctor.newInstance(parameterType, bindingName, value, tribbleType, tags);
|
||||
} catch (Exception e) {
|
||||
if ( e instanceof UserException )
|
||||
throw ((UserException)e);
|
||||
else
|
||||
throw new UserException.CommandLineException(
|
||||
String.format("Failed to parse value %s for argument %s. Message: %s",
|
||||
value, fieldName, e.getMessage()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -324,6 +433,7 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
|||
public boolean createsTypeDefault(ArgumentSource source) { return ! source.isRequired(); }
|
||||
|
||||
@Override
|
||||
@SuppressWarnings("unchecked")
|
||||
public Object createTypeDefault(ParsingEngine parsingEngine, ArgumentSource source, Type type) {
|
||||
Class parameterType = JVMUtils.getParameterizedTypeClass(type);
|
||||
return RodBinding.makeUnbound((Class<? extends Feature>)parameterType);
|
||||
|
|
@ -336,118 +446,16 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
|||
|
||||
@Override
|
||||
public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches) {
|
||||
return parse(parsingEngine, source, type, matches, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* The actual argument parsing method.
|
||||
*
|
||||
* IMPORTANT NOTE: the createIntervalBinding argument is a bit of a hack, but after discussions with SE we've decided
|
||||
* that it's the best way to proceed for now. IntervalBindings can either be proper RodBindings (hence the use of
|
||||
* this parse() method) or can be Strings (representing raw intervals or the files containing them). If createIntervalBinding
|
||||
* is true, we do not call parsingEngine.addRodBinding() because we don't want walkers to assume that these are the
|
||||
* usual set of RodBindings. It also allows us in the future to be smart about tagging rods as intervals. One other
|
||||
* side point is that we want to continue to allow the usage of non-Feature intervals so that users can theoretically
|
||||
* continue to input them out of order (whereas Tribble Features are ordered).
|
||||
*
|
||||
* @param parsingEngine parsing engine
|
||||
* @param source source
|
||||
* @param type type to check
|
||||
* @param matches matches
|
||||
* @param createIntervalBinding should we attempt to create an IntervalBinding instead of a RodBinding?
|
||||
* @return the RodBinding/IntervalBinding object depending on the value of createIntervalBinding.
|
||||
*/
|
||||
public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches, boolean createIntervalBinding) {
|
||||
ArgumentDefinition defaultDefinition = createDefaultArgumentDefinition(source);
|
||||
String value = getArgumentValue( defaultDefinition, matches );
|
||||
Class<? extends Feature> parameterType = JVMUtils.getParameterizedTypeClass(type);
|
||||
|
||||
try {
|
||||
String name = defaultDefinition.fullName;
|
||||
String tribbleType = null;
|
||||
Tags tags = getArgumentTags(matches);
|
||||
// must have one or two tag values here
|
||||
if ( tags.getPositionalTags().size() > 2 ) {
|
||||
throw new UserException.CommandLineException(
|
||||
String.format("Unexpected number of positional tags for argument %s : %s. " +
|
||||
"Rod bindings only support -X:type and -X:name,type argument styles",
|
||||
value, source.field.getName()));
|
||||
} if ( tags.getPositionalTags().size() == 2 ) {
|
||||
// -X:name,type style
|
||||
name = tags.getPositionalTags().get(0);
|
||||
tribbleType = tags.getPositionalTags().get(1);
|
||||
} else {
|
||||
// case with 0 or 1 positional tags
|
||||
FeatureManager manager = new FeatureManager();
|
||||
|
||||
// -X:type style is a type when we cannot determine the type dynamically
|
||||
String tag1 = tags.getPositionalTags().size() == 1 ? tags.getPositionalTags().get(0) : null;
|
||||
if ( tag1 != null ) {
|
||||
if ( manager.getByName(tag1) != null ) // this a type
|
||||
tribbleType = tag1;
|
||||
else
|
||||
name = tag1;
|
||||
}
|
||||
|
||||
if ( tribbleType == null ) {
|
||||
// try to determine the file type dynamically
|
||||
File file = new File(value);
|
||||
if ( file.canRead() && file.isFile() ) {
|
||||
FeatureManager.FeatureDescriptor featureDescriptor = manager.getByFiletype(file);
|
||||
if ( featureDescriptor != null ) {
|
||||
tribbleType = featureDescriptor.getName();
|
||||
logger.info("Dynamically determined type of " + file + " to be " + tribbleType);
|
||||
}
|
||||
}
|
||||
|
||||
if ( tribbleType == null ) {
|
||||
// IntervalBindings allow streaming conversion of Strings
|
||||
if ( createIntervalBinding ) {
|
||||
return new IntervalBinding(value);
|
||||
}
|
||||
|
||||
if ( ! file.exists() ) {
|
||||
throw new UserException.CouldNotReadInputFile(file, "file does not exist");
|
||||
} else if ( ! file.canRead() || ! file.isFile() ) {
|
||||
throw new UserException.CouldNotReadInputFile(file, "file could not be read");
|
||||
} else {
|
||||
throw new UserException.CommandLineException(
|
||||
String.format("No tribble type was provided on the command line and the type of the file could not be determined dynamically. " +
|
||||
"Please add an explicit type tag :NAME listing the correct type from among the supported types:%n%s",
|
||||
manager.userFriendlyListOfAvailableFeatures(parameterType)));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Constructor ctor = (makeRawTypeIfNecessary(type)).getConstructor(Class.class, String.class, String.class, String.class, Tags.class);
|
||||
Object result;
|
||||
if ( createIntervalBinding ) {
|
||||
result = ctor.newInstance(parameterType, name, value, tribbleType, tags);
|
||||
} else {
|
||||
RodBinding rbind = (RodBinding)ctor.newInstance(parameterType, name, value, tribbleType, tags);
|
||||
parsingEngine.addTags(rbind, tags);
|
||||
parsingEngine.addRodBinding(rbind);
|
||||
result = rbind;
|
||||
}
|
||||
return result;
|
||||
} catch (InvocationTargetException e) {
|
||||
throw new UserException.CommandLineException(
|
||||
String.format("Failed to parse value %s for argument %s.",
|
||||
value, source.field.getName()));
|
||||
} catch (Exception e) {
|
||||
if ( e instanceof UserException )
|
||||
throw ((UserException)e);
|
||||
else
|
||||
throw new UserException.CommandLineException(
|
||||
String.format("Failed to parse value %s for argument %s. Message: %s",
|
||||
value, source.field.getName(), e.getMessage()));
|
||||
}
|
||||
Tags tags = getArgumentTags(matches);
|
||||
RodBinding rbind = (RodBinding)parseBinding(source, type, matches, tags);
|
||||
parsingEngine.addTags(rbind, tags);
|
||||
parsingEngine.addRodBinding(rbind);
|
||||
return rbind;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Parser for RodBinding objects
|
||||
* Parser for IntervalBinding objects
|
||||
*/
|
||||
class IntervalBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
||||
/**
|
||||
|
|
@ -475,7 +483,7 @@ class IntervalBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
|||
*/
|
||||
@Override
|
||||
public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches) {
|
||||
return new RodBindingArgumentTypeDescriptor().parse(parsingEngine, source, type, matches, true);
|
||||
return parseBinding(source, type, matches, getArgumentTags(matches));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -783,7 +791,7 @@ class MultiplexArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
|||
}
|
||||
|
||||
Class<? extends Multiplexer> multiplexerType = dependentArgument.field.getAnnotation(Multiplex.class).value();
|
||||
Constructor<? extends Multiplexer> multiplexerConstructor = null;
|
||||
Constructor<? extends Multiplexer> multiplexerConstructor;
|
||||
try {
|
||||
multiplexerConstructor = multiplexerType.getConstructor(sourceTypes);
|
||||
multiplexerConstructor.setAccessible(true);
|
||||
|
|
@ -792,7 +800,7 @@ class MultiplexArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
|||
throw new ReviewedStingException(String.format("Unable to find constructor for class %s with parameters %s",multiplexerType.getName(),Arrays.deepToString(sourceFields)),ex);
|
||||
}
|
||||
|
||||
Multiplexer multiplexer = null;
|
||||
Multiplexer multiplexer;
|
||||
try {
|
||||
multiplexer = multiplexerConstructor.newInstance(sourceValues);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -78,24 +78,7 @@ public abstract class ParsingMethod {
|
|||
|
||||
String argument = matcher.group(1).trim();
|
||||
|
||||
Tags tags = new Tags();
|
||||
if(matcher.group(2) != null) {
|
||||
for(String tag: Utils.split(matcher.group(2),",")) {
|
||||
// Check for presence of an '=' sign, indicating a key-value pair in the tag line.
|
||||
int equalDelimiterPos = tag.indexOf('=');
|
||||
if(equalDelimiterPos >= 0) {
|
||||
// Sanity check; ensure that there aren't multiple '=' in this key-value pair.
|
||||
if(tag.indexOf('=',equalDelimiterPos+1) >= 0)
|
||||
throw new ArgumentException(String.format("Tag %s passed to argument %s is malformed. Please ensure that " +
|
||||
"key-value tags are of the form <key>=<value>, and neither key " +
|
||||
"nor value contain the '=' character", tag, argument));
|
||||
tags.addKeyValueTag(tag.substring(0,equalDelimiterPos),tag.substring(equalDelimiterPos+1));
|
||||
}
|
||||
else
|
||||
tags.addPositionalTag(tag);
|
||||
|
||||
}
|
||||
}
|
||||
Tags tags = parseTags(argument, matcher.group(2));
|
||||
|
||||
// Find the most appropriate argument definition for the given argument.
|
||||
ArgumentDefinition argumentDefinition = definitions.findArgumentDefinition( argument, definitionMatcher );
|
||||
|
|
@ -105,6 +88,28 @@ public abstract class ParsingMethod {
|
|||
return new ArgumentMatch(argument,argumentDefinition,position,tags);
|
||||
}
|
||||
|
||||
public static Tags parseTags(String argument, String tagString) {
|
||||
Tags tags = new Tags();
|
||||
if (tagString != null) {
|
||||
for(String tag: Utils.split(tagString, ",")) {
|
||||
// Check for presence of an '=' sign, indicating a key-value pair in the tag line.
|
||||
int equalDelimiterPos = tag.indexOf('=');
|
||||
if(equalDelimiterPos >= 0) {
|
||||
// Sanity check; ensure that there aren't multiple '=' in this key-value pair.
|
||||
if(tag.indexOf('=',equalDelimiterPos+1) >= 0)
|
||||
throw new ArgumentException(String.format("Tag %s passed to argument %s is malformed. Please ensure that " +
|
||||
"key-value tags are of the form <key>=<value>, and neither key " +
|
||||
"nor value contain the '=' character", tag, argument));
|
||||
tags.addKeyValueTag(tag.substring(0,equalDelimiterPos),tag.substring(equalDelimiterPos+1));
|
||||
}
|
||||
else
|
||||
tags.addPositionalTag(tag);
|
||||
|
||||
}
|
||||
}
|
||||
return tags;
|
||||
}
|
||||
|
||||
/**
|
||||
* A command-line argument always starts with an alphabetical character or underscore followed by any word character.
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -30,7 +30,6 @@ import net.sf.samtools.SAMFileHeader;
|
|||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.SAMSequenceDictionary;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broad.tribble.readers.PositionalBufferedStream;
|
||||
import org.broadinstitute.sting.commandline.*;
|
||||
import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
|
||||
|
|
@ -54,9 +53,9 @@ import org.broadinstitute.sting.utils.*;
|
|||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalSetRule;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalUtils;
|
||||
import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
|
||||
import org.broadinstitute.sting.utils.variantcontext.GenotypeBuilder;
|
||||
|
|
@ -582,7 +581,6 @@ public class GenomeAnalysisEngine {
|
|||
* Setup the intervals to be processed
|
||||
*/
|
||||
protected void initializeIntervals() {
|
||||
|
||||
// return if no interval arguments at all
|
||||
if ( argCollection.intervals == null && argCollection.excludeIntervals == null )
|
||||
return;
|
||||
|
|
@ -590,17 +588,22 @@ public class GenomeAnalysisEngine {
|
|||
// Note that the use of '-L all' is no longer supported.
|
||||
|
||||
// if include argument isn't given, create new set of all possible intervals
|
||||
GenomeLocSortedSet includeSortedSet = (argCollection.intervals == null ?
|
||||
GenomeLocSortedSet.createSetFromSequenceDictionary(this.referenceDataSource.getReference().getSequenceDictionary()) :
|
||||
loadIntervals(argCollection.intervals, argCollection.intervalSetRule, argCollection.intervalPadding));
|
||||
|
||||
Pair<GenomeLocSortedSet, GenomeLocSortedSet> includeExcludePair = IntervalUtils.parseIntervalBindingsPair(
|
||||
this.referenceDataSource,
|
||||
argCollection.intervals,
|
||||
argCollection.intervalSetRule, argCollection.intervalMerging, argCollection.intervalPadding,
|
||||
argCollection.excludeIntervals);
|
||||
|
||||
GenomeLocSortedSet includeSortedSet = includeExcludePair.getFirst();
|
||||
GenomeLocSortedSet excludeSortedSet = includeExcludePair.getSecond();
|
||||
|
||||
// if no exclude arguments, can return parseIntervalArguments directly
|
||||
if ( argCollection.excludeIntervals == null )
|
||||
if ( excludeSortedSet == null )
|
||||
intervals = includeSortedSet;
|
||||
|
||||
// otherwise there are exclude arguments => must merge include and exclude GenomeLocSortedSets
|
||||
else {
|
||||
GenomeLocSortedSet excludeSortedSet = loadIntervals(argCollection.excludeIntervals, IntervalSetRule.UNION);
|
||||
intervals = includeSortedSet.subtractRegions(excludeSortedSet);
|
||||
|
||||
// logging messages only printed when exclude (-XL) arguments are given
|
||||
|
|
@ -613,43 +616,6 @@ public class GenomeAnalysisEngine {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads the intervals relevant to the current execution
|
||||
* @param argList argument bindings; might include filenames, intervals in samtools notation, or a combination of the above
|
||||
* @param rule interval merging rule
|
||||
* @return A sorted, merged list of all intervals specified in this arg list.
|
||||
*/
|
||||
protected GenomeLocSortedSet loadIntervals( final List<IntervalBinding<Feature>> argList, final IntervalSetRule rule ) {
|
||||
return loadIntervals(argList, rule, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads the intervals relevant to the current execution
|
||||
* @param argList argument bindings; might include filenames, intervals in samtools notation, or a combination of the above
|
||||
* @param rule interval merging rule
|
||||
* @param padding how much to pad the intervals
|
||||
* @return A sorted, merged list of all intervals specified in this arg list.
|
||||
*/
|
||||
protected GenomeLocSortedSet loadIntervals( final List<IntervalBinding<Feature>> argList, final IntervalSetRule rule, final int padding ) {
|
||||
|
||||
List<GenomeLoc> allIntervals = new ArrayList<GenomeLoc>();
|
||||
for ( IntervalBinding intervalBinding : argList ) {
|
||||
List<GenomeLoc> intervals = intervalBinding.getIntervals(this.getGenomeLocParser());
|
||||
|
||||
if ( intervals.isEmpty() ) {
|
||||
logger.warn("The interval file " + intervalBinding.getSource() + " contains no intervals that could be parsed.");
|
||||
}
|
||||
|
||||
if ( padding > 0 ) {
|
||||
intervals = IntervalUtils.getIntervalsWithFlanks(this.getGenomeLocParser(), intervals, padding);
|
||||
}
|
||||
|
||||
allIntervals = IntervalUtils.mergeListsBySetOperator(intervals, allIntervals, rule);
|
||||
}
|
||||
|
||||
return IntervalUtils.sortAndMergeIntervals(genomeLocParser, allIntervals, argCollection.intervalMerging);
|
||||
}
|
||||
|
||||
/**
|
||||
* Add additional, externally managed IO streams for inputs.
|
||||
*
|
||||
|
|
@ -830,7 +796,8 @@ public class GenomeAnalysisEngine {
|
|||
throw new UserException.CouldNotReadInputFile(getArguments().repairVCFHeader, e);
|
||||
}
|
||||
}
|
||||
RMDTrackBuilder builder = new RMDTrackBuilder(sequenceDictionary,genomeLocParser,header,validationExclusionType);
|
||||
|
||||
RMDTrackBuilder builder = new RMDTrackBuilder(sequenceDictionary,genomeLocParser, header, validationExclusionType);
|
||||
|
||||
List<ReferenceOrderedDataSource> dataSources = new ArrayList<ReferenceOrderedDataSource>();
|
||||
for (RMDTriplet fileDescriptor : referenceMetaDataFiles)
|
||||
|
|
@ -854,6 +821,15 @@ public class GenomeAnalysisEngine {
|
|||
return readsDataSource.getHeader();
|
||||
}
|
||||
|
||||
public boolean lenientVCFProcessing() {
|
||||
return lenientVCFProcessing(argCollection.unsafe);
|
||||
}
|
||||
|
||||
public static boolean lenientVCFProcessing(final ValidationExclusion.TYPE val) {
|
||||
return val == ValidationExclusion.TYPE.ALL
|
||||
|| val == ValidationExclusion.TYPE.LENIENT_VCF_PROCESSING;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the unmerged SAM file header for an individual reader.
|
||||
* @param reader The reader.
|
||||
|
|
|
|||
|
|
@ -347,9 +347,6 @@ public class GATKArgumentCollection {
|
|||
public boolean USE_SLOW_GENOTYPES = false;
|
||||
// TODO -- remove all code tagged with TODO -- remove me when argument generateShadowBCF is removed
|
||||
|
||||
@Argument(fullName="allowMissingVCFHeaders",shortName = "allowMissingVCFHeaders",doc="If provided, the GATK will write out VCF files that contain INFO, FILTER, and FORMAT fields not found in the VCF header",required=false)
|
||||
public boolean allowMissingVCFHeaders = false;
|
||||
|
||||
/**
|
||||
* The file pointed to by this argument must be a VCF file. The GATK will read in just the header of this file
|
||||
* and then use the INFO, FORMAT, and FILTER field values from this file to repair the header file of any other
|
||||
|
|
|
|||
|
|
@ -40,6 +40,7 @@ public class ValidationExclusion {
|
|||
ALLOW_UNSET_BAM_SORT_ORDER, // assume that the bam is sorted, even if the SO (sort-order) flag is not set
|
||||
NO_READ_ORDER_VERIFICATION, // do not validate that the reads are in order as we take them from the bam file
|
||||
ALLOW_SEQ_DICT_INCOMPATIBILITY, // allow dangerous, but not fatal, sequence dictionary incompabilities
|
||||
LENIENT_VCF_PROCESSING, // allow non-standard values for standard VCF header lines. Don't worry about size differences between header and values, etc.
|
||||
@EnumerationArgumentDefault // set the ALL value to the default value, so if they specify just -U, we get the ALL
|
||||
ALL // do not check for all of the above conditions, DEFAULT
|
||||
}
|
||||
|
|
|
|||
|
|
@ -107,8 +107,10 @@ public class VariantContextWriterStorage implements Storage<VariantContextWriter
|
|||
// TODO -- remove me when argument generateShadowBCF is removed
|
||||
if ( stub.alsoWriteBCFForTest() && ! VariantContextWriterFactory.isBCFOutput(file, options)) {
|
||||
final File bcfFile = BCF2Utils.shadowBCF(file);
|
||||
VariantContextWriter bcfWriter = VariantContextWriterFactory.create(bcfFile, stub.getMasterSequenceDictionary(), options);
|
||||
writer = new TestWriter(writer, bcfWriter);
|
||||
if ( bcfFile != null ) {
|
||||
VariantContextWriter bcfWriter = VariantContextWriterFactory.create(bcfFile, stub.getMasterSequenceDictionary(), options);
|
||||
writer = new TestWriter(writer, bcfWriter);
|
||||
}
|
||||
}
|
||||
|
||||
return writer;
|
||||
|
|
|
|||
|
|
@ -183,7 +183,7 @@ public class VariantContextWriterStub implements Stub<VariantContextWriter>, Var
|
|||
List<Options> options = new ArrayList<Options>();
|
||||
|
||||
if ( doNotWriteGenotypes ) options.add(Options.DO_NOT_WRITE_GENOTYPES);
|
||||
if ( engine.getArguments().allowMissingVCFHeaders ) options.add(Options.ALLOW_MISSING_FIELDS_IN_HEADER);
|
||||
if ( engine.lenientVCFProcessing() ) options.add(Options.ALLOW_MISSING_FIELDS_IN_HEADER);
|
||||
if ( indexOnTheFly && ! isCompressed() ) options.add(Options.INDEX_ON_THE_FLY);
|
||||
|
||||
return options.isEmpty() ? EnumSet.noneOf(Options.class) : EnumSet.copyOf(options);
|
||||
|
|
|
|||
|
|
@ -33,6 +33,7 @@ import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec;
|
|||
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.AbstractVCFCodec;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
|
@ -85,16 +86,18 @@ public class FeatureManager {
|
|||
private final PluginManager<FeatureCodec> pluginManager;
|
||||
private final Collection<FeatureDescriptor> featureDescriptors = new TreeSet<FeatureDescriptor>();
|
||||
private final VCFHeader headerForRepairs;
|
||||
private final boolean lenientVCFProcessing;
|
||||
|
||||
/**
|
||||
* Construct a FeatureManager without a master VCF header
|
||||
*/
|
||||
public FeatureManager() {
|
||||
this(null);
|
||||
this(null, false);
|
||||
}
|
||||
|
||||
public FeatureManager(final VCFHeader headerForRepairs) {
|
||||
public FeatureManager(final VCFHeader headerForRepairs, final boolean lenientVCFProcessing) {
|
||||
this.headerForRepairs = headerForRepairs;
|
||||
this.lenientVCFProcessing = lenientVCFProcessing;
|
||||
pluginManager = new PluginManager<FeatureCodec>(FeatureCodec.class, "Codecs", "Codec");
|
||||
|
||||
for (final String rawName: pluginManager.getPluginsByName().keySet()) {
|
||||
|
|
@ -252,8 +255,11 @@ public class FeatureManager {
|
|||
((NameAwareCodec)codex).setName(name);
|
||||
if ( codex instanceof ReferenceDependentFeatureCodec )
|
||||
((ReferenceDependentFeatureCodec)codex).setGenomeLocParser(genomeLocParser);
|
||||
if ( codex instanceof VCFCodec)
|
||||
if ( codex instanceof VCFCodec )
|
||||
((VCFCodec)codex).setHeaderForRepairs(headerForRepairs);
|
||||
if ( codex instanceof AbstractVCFCodec && lenientVCFProcessing )
|
||||
((AbstractVCFCodec)codex).disableOnTheFlyModifications();
|
||||
|
||||
return codex;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -34,6 +34,7 @@ import org.broad.tribble.index.Index;
|
|||
import org.broad.tribble.index.IndexFactory;
|
||||
import org.broad.tribble.util.LittleEndianOutputStream;
|
||||
import org.broadinstitute.sting.commandline.Tags;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet.RMDStorageType;
|
||||
|
|
@ -98,7 +99,7 @@ public class RMDTrackBuilder { // extends PluginManager<FeatureCodec> {
|
|||
this.dict = dict;
|
||||
this.validationExclusionType = validationExclusionType;
|
||||
this.genomeLocParser = genomeLocParser;
|
||||
this.featureManager = new FeatureManager(headerForRepairs);
|
||||
this.featureManager = new FeatureManager(headerForRepairs, GenomeAnalysisEngine.lenientVCFProcessing(validationExclusionType));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -71,11 +71,13 @@ public class BQSRGatherer extends Gatherer {
|
|||
if (RAC.recalibrationReport != null && !RAC.NO_PLOTS) {
|
||||
File recal_out = new File(output.getName() + ".original");
|
||||
RecalibrationReport originalReport = new RecalibrationReport(RAC.recalibrationReport);
|
||||
RecalDataManager.generateRecalibrationPlot(recal_out, originalReport.getKeysAndTablesMap(), generalReport.getKeysAndTablesMap(), RAC.KEEP_INTERMEDIATE_FILES);
|
||||
// TODO -- fix me
|
||||
//RecalDataManager.generateRecalibrationPlot(recal_out, originalReport.getKeysAndTablesMap(), generalReport.getKeysAndTablesMap(), RAC.KEEP_INTERMEDIATE_FILES);
|
||||
}
|
||||
else if (!RAC.NO_PLOTS) {
|
||||
File recal_out = new File(output.getName() + ".recal");
|
||||
RecalDataManager.generateRecalibrationPlot(recal_out, generalReport.getKeysAndTablesMap(), RAC.KEEP_INTERMEDIATE_FILES);
|
||||
// TODO -- fix me
|
||||
//RecalDataManager.generateRecalibrationPlot(recal_out, generalReport.getKeysAndTablesMap(), RAC.KEEP_INTERMEDIATE_FILES);
|
||||
}
|
||||
|
||||
generalReport.output(outputFile);
|
||||
|
|
|
|||
|
|
@ -1,329 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* This class provides all the functionality for the BitSet representation of the keys to the hash table of BQSR
|
||||
*
|
||||
* It also handles the event type "covariate" which is not exactly a covariate, but is added as a key to the hashmap. The Key Manager will
|
||||
* add the event type as a bitset to the end of the covariate bitset key. This way, it won't get int the way of masking the information
|
||||
* out of the key for the actual covariates, and having the covariates handle it. The key manager handles the event type.
|
||||
*
|
||||
* The keys represented by this key manager will always have the same order:
|
||||
*
|
||||
* RequiredCovariate1, RequiredCovariate2, ..., RequiredCovariateN, OptionalCovariate1, OptionalCovariateID, EventType
|
||||
* RequiredCovariate1, RequiredCovariate2, ..., RequiredCovariateN, OptionalCovariate2, OptionalCovariateID, EventType
|
||||
* ...
|
||||
* RequiredCovariate1, RequiredCovariate2, ..., RequiredCovariateN, OptionalCovariateN, OptionalCovariateID, EventType
|
||||
*
|
||||
*
|
||||
* Note that Optional Covariates are optional, and the Key Manager should operate without them if necessary.
|
||||
*
|
||||
* @author Mauricio Carneiro
|
||||
* @since 3/6/12
|
||||
*/
|
||||
public class BQSRKeyManager {
|
||||
|
||||
private final Covariate[] requiredCovariates;
|
||||
private final Covariate[] optionalCovariates;
|
||||
private final RequiredCovariateInfo[] requiredCovariatesInfo;
|
||||
private final OptionalCovariateInfo[] optionalCovariatesInfo;
|
||||
private final Map<String, Short> covariateNameToIDMap;
|
||||
|
||||
private int nRequiredBits; // Number of bits used to represent the required covariates
|
||||
|
||||
private final int optionalCovariateOffset;
|
||||
private final int optionalCovariateIDOffset;
|
||||
|
||||
private final long optionalCovariateMask; // Standard mask for optional covariates key
|
||||
private final long optionalCovariateIDMask; // Standard mask for optional covariates order key
|
||||
private final long eventIDMask; // Standard mask for event ID
|
||||
|
||||
/**
|
||||
* Initializes the KeyManager with the total number of covariates to use
|
||||
*
|
||||
* @param requiredCovariates the ordered list of required covariates
|
||||
* @param optionalCovariates the ordered list of optional covariates
|
||||
*/
|
||||
public BQSRKeyManager(final List<Covariate> requiredCovariates, final List<Covariate> optionalCovariates) {
|
||||
this.requiredCovariates = new Covariate[requiredCovariates.size()];
|
||||
this.optionalCovariates = new Covariate[optionalCovariates.size()];
|
||||
requiredCovariatesInfo = new RequiredCovariateInfo[requiredCovariates.size()]; // initialize the required covariates list
|
||||
optionalCovariatesInfo = new OptionalCovariateInfo[optionalCovariates.size()]; // initialize the optional covariates list (size may be 0, it's okay)
|
||||
covariateNameToIDMap = new HashMap<String, Short>(optionalCovariates.size()*2); // the map from covariate name to covariate id (when reading GATK Reports, we get the IDs as names of covariates)
|
||||
|
||||
nRequiredBits = 0;
|
||||
for (int i = 0; i < requiredCovariates.size(); i++) { // create a list of required covariates with the extra information for key management
|
||||
final Covariate required = requiredCovariates.get(i);
|
||||
final int nBits = required.numberOfBits(); // number of bits used by this covariate
|
||||
final long mask = genericMask(nRequiredBits, nBits); // create a mask for this covariate
|
||||
this.requiredCovariates[i] = required;
|
||||
requiredCovariatesInfo[i] = new RequiredCovariateInfo(nBits, nRequiredBits, mask, required); // Create an object for this required covariate
|
||||
nRequiredBits += nBits;
|
||||
}
|
||||
|
||||
final int bitsInEventType = numberOfBitsToRepresent(EventType.values().length);
|
||||
eventIDMask = genericMask(nRequiredBits, bitsInEventType);
|
||||
|
||||
short id = 0;
|
||||
int nOptionalBits = 0;
|
||||
for (int i = 0; i < optionalCovariates.size(); i++) {
|
||||
final Covariate optional = optionalCovariates.get(i);
|
||||
nOptionalBits = Math.max(nOptionalBits, optional.numberOfBits()); // optional covariates are represented by the number of bits needed by biggest covariate
|
||||
this.optionalCovariates[i] = optional;
|
||||
optionalCovariatesInfo[i] = new OptionalCovariateInfo(id, optional);
|
||||
final String covariateName = optional.getClass().getSimpleName().split("Covariate")[0]; // get the name of the covariate (without the "covariate" part of it) so we can match with the GATKReport
|
||||
covariateNameToIDMap.put(covariateName, id);
|
||||
id++;
|
||||
}
|
||||
|
||||
optionalCovariateOffset = nRequiredBits + bitsInEventType;
|
||||
optionalCovariateMask = genericMask(optionalCovariateOffset, nOptionalBits); // the generic mask to extract optional covariate bits from the combined bitset
|
||||
optionalCovariateIDOffset = nRequiredBits + bitsInEventType + nOptionalBits;
|
||||
final int nOptionalIDBits = numberOfBitsToRepresent(optionalCovariates.size()); // number of bits used to represent the covariate ID
|
||||
optionalCovariateIDMask = genericMask(optionalCovariateIDOffset, nOptionalIDBits); // the generic mask to extract optional covariate ID bits from the combined bitset
|
||||
|
||||
final int totalNumberOfBits = optionalCovariateIDOffset + nOptionalIDBits; // total number of bits used in the final key
|
||||
if ( totalNumberOfBits > 64 )
|
||||
throw new UserException.BadInput("The total number of bits used for the master BQSR key is greater than 64 and cannot be represented in a long");
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates one key given the optional covariate (or none if it is null)
|
||||
*
|
||||
* Keys include all required covariates, the standard covariate and the event type.
|
||||
*
|
||||
* @param allKeys The keys in long representation for each covariate (includes all optional covariates, not just the one requested)
|
||||
* @param eventType The type of event described by this keyset (e.g. mismatches, insertions, deletions)
|
||||
* @return one key in long representation (non-negative) or -1 for a bad key
|
||||
*/
|
||||
public long createMasterKey(final long[] allKeys, final EventType eventType, final int optionalCovariateIndex) {
|
||||
|
||||
int keyIndex = 0;
|
||||
long masterKey = 0L; // This will be a master key holding all the required keys, to replicate later on
|
||||
for (RequiredCovariateInfo infoRequired : requiredCovariatesInfo)
|
||||
masterKey |= (allKeys[keyIndex++] << infoRequired.offset);
|
||||
|
||||
final long eventKey = keyFromEvent(eventType); // create a key for the event type
|
||||
masterKey |= (eventKey << nRequiredBits);
|
||||
|
||||
if (optionalCovariateIndex >= 0 && optionalCovariateIndex < optionalCovariates.length) {
|
||||
final long covariateKey = allKeys[keyIndex + optionalCovariateIndex];
|
||||
if (covariateKey < 0) // do not add "nulls" to the final set of keys
|
||||
return -1;
|
||||
|
||||
masterKey |= (covariateKey << optionalCovariateOffset);
|
||||
masterKey |= (optionalCovariatesInfo[optionalCovariateIndex].covariateID << optionalCovariateIDOffset);
|
||||
}
|
||||
|
||||
return masterKey;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates one key for the covariates represented in Object[] key
|
||||
*
|
||||
* The covariates will have the actual objects produced by the covariates (probably read from the recalibration data file)
|
||||
* and will contain all required covariates and one (or none) optional covariates. Therefore, the product is one key, not many.
|
||||
*
|
||||
* Example key:
|
||||
* RG, QUAL, CYCLE, CYCLE_ID, EventType
|
||||
*
|
||||
* @param key list of objects produced by the required covariates followed by one or zero optional covariates.
|
||||
* @return a key representing these objects.
|
||||
*/
|
||||
public long longFromKey(Object[] key) {
|
||||
int requiredCovariate = 0;
|
||||
long masterKey = 0L; // This will be a master key holding all the required keys, to replicate later on
|
||||
for (RequiredCovariateInfo infoRequired : requiredCovariatesInfo)
|
||||
masterKey |= (infoRequired.covariate.longFromKey(key[requiredCovariate++]) << infoRequired.offset);
|
||||
|
||||
final int eventIndex = key.length - 1; // the event type is always the last key
|
||||
final long eventKey = keyFromEvent((EventType) key[eventIndex]); // create a key for the event type
|
||||
masterKey |= (eventKey << nRequiredBits);
|
||||
|
||||
if (optionalCovariatesInfo.length > 0) {
|
||||
final int covariateIndex = requiredCovariatesInfo.length; // the optional covariate index in the key array
|
||||
final int covariateIDIndex = covariateIndex + 1; // the optional covariate ID index is right after the optional covariate's
|
||||
final short covariateID = parseCovariateID(key[covariateIDIndex]); // when reading the GATK Report the ID may come in a String instead of an index
|
||||
final OptionalCovariateInfo infoOptional = optionalCovariatesInfo[covariateID]; // so we can get the optional covariate information
|
||||
|
||||
final long covariateKey = infoOptional.covariate.longFromKey(key[covariateIndex]); // convert the optional covariate key into a bitset using the covariate's interface
|
||||
masterKey |= (covariateKey << optionalCovariateOffset);
|
||||
masterKey |= (infoOptional.covariateID << optionalCovariateIDOffset);
|
||||
}
|
||||
|
||||
return masterKey;
|
||||
}
|
||||
|
||||
/**
|
||||
* Covariate id can be either the covariate name (String) or the actual id (short). This method
|
||||
* finds it's type and converts accordingly to the short notation.
|
||||
*
|
||||
* @param id the string or short representation of the optional covariate id
|
||||
* @return the short representation of the optional covariate id.
|
||||
*/
|
||||
private short parseCovariateID(final Object id) {
|
||||
return (id instanceof String) ? covariateNameToIDMap.get(id.toString()) : (Short) id;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates a key set of objects from a combined master key.
|
||||
*
|
||||
* Masks out each covariate independently and decodes their values (Object) into a keyset
|
||||
*
|
||||
* @param master the master representation of the keys
|
||||
* @return an object array with the values for each key
|
||||
*/
|
||||
public List<Object> keySetFrom(final long master) {
|
||||
final List<Object> objectKeys = new ArrayList<Object>();
|
||||
for (RequiredCovariateInfo info : requiredCovariatesInfo) {
|
||||
final long covariateKey = extractKeyFromMaster(master, info.mask, info.offset); // get the covariate's key
|
||||
objectKeys.add(info.covariate.formatKey(covariateKey)); // convert the key to object using covariate's interface
|
||||
}
|
||||
|
||||
if (optionalCovariatesInfo.length > 0) {
|
||||
final long covKey = extractKeyFromMaster(master, optionalCovariateMask, optionalCovariateOffset); // get the covariate's key
|
||||
final int covIDKey = (int)extractKeyFromMaster(master, optionalCovariateIDMask, optionalCovariateIDOffset); // get the covariate's id (to identify which covariate this is)
|
||||
Covariate covariate = optionalCovariatesInfo[(short)covIDKey].covariate; // get the corresponding optional covariate object
|
||||
objectKeys.add(covariate.formatKey(covKey)); // add the optional covariate key to the key set
|
||||
objectKeys.add(covariate.getClass().getSimpleName().split("Covariate")[0]); // add the covariate name using the id
|
||||
}
|
||||
|
||||
objectKeys.add(EventType.eventFrom((int)extractKeyFromMaster(master, eventIDMask, nRequiredBits))); // add the event type object to the key set
|
||||
|
||||
return objectKeys;
|
||||
}
|
||||
|
||||
public Covariate[] getRequiredCovariates() {
|
||||
return requiredCovariates;
|
||||
}
|
||||
|
||||
public Covariate[] getOptionalCovariates() {
|
||||
return optionalCovariates;
|
||||
}
|
||||
|
||||
public int getNumRequiredCovariates() {
|
||||
return requiredCovariates.length;
|
||||
}
|
||||
|
||||
public int getNumOptionalCovariates() {
|
||||
return optionalCovariates.length;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a mask for the requested covariate to extract the relevant key from a combined master key
|
||||
*
|
||||
* @param offset the offset into the master key
|
||||
* @param nBits the number of bits needed by the Covariate to represent its values
|
||||
* @return the mask relevant to the covariate
|
||||
*/
|
||||
private long genericMask(final int offset, final int nBits) {
|
||||
long mask = 0L;
|
||||
for ( int i = 0; i < nBits; i++ )
|
||||
mask |= 1L << (offset+i);
|
||||
return mask;
|
||||
}
|
||||
|
||||
private long extractKeyFromMaster(final long master, final long mask, final int offset) {
|
||||
long key = master & mask;
|
||||
return key >> offset;
|
||||
}
|
||||
|
||||
// cache the key representing an event since it's otherwise created a massive amount of times
|
||||
private static final long[] eventTypeCache = new long[EventType.values().length]; // event IDs must be longs so that bit-fiddling works
|
||||
static {
|
||||
for (final EventType eventType : EventType.values())
|
||||
eventTypeCache[eventType.index] = (long)eventType.index;
|
||||
}
|
||||
|
||||
private long keyFromEvent(final EventType eventType) {
|
||||
return eventTypeCache[eventType.index];
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (!(o instanceof BQSRKeyManager))
|
||||
return false;
|
||||
|
||||
BQSRKeyManager other = (BQSRKeyManager) o;
|
||||
if (this == other)
|
||||
return true;
|
||||
|
||||
if (requiredCovariatesInfo.length != other.requiredCovariatesInfo.length ||
|
||||
optionalCovariatesInfo.length != other.optionalCovariatesInfo.length)
|
||||
return false;
|
||||
|
||||
for (int i = 0; i < requiredCovariates.length; i++) {
|
||||
Covariate myRequiredCovariate = requiredCovariates[i];
|
||||
Covariate otherRequiredCovariate = other.requiredCovariates[i];
|
||||
String thisName = myRequiredCovariate.getClass().getSimpleName();
|
||||
String otherName = otherRequiredCovariate.getClass().getSimpleName();
|
||||
if (!thisName.equals(otherName))
|
||||
return false;
|
||||
}
|
||||
|
||||
for (int i = 0; i < optionalCovariates.length; i++) {
|
||||
Covariate myOptionalCovariate = optionalCovariates[i];
|
||||
Covariate otherOptionalCovariate = other.optionalCovariates[i];
|
||||
String thisName = myOptionalCovariate.getClass().getSimpleName();
|
||||
String otherName = otherOptionalCovariate.getClass().getSimpleName();
|
||||
if (!thisName.equals(otherName))
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculates the number of bits necessary to represent a given number of elements
|
||||
*
|
||||
* @param numberOfElements the number of elements to represent (must be positive)
|
||||
* @return the number of bits necessary to represent this many elements
|
||||
*/
|
||||
public static int numberOfBitsToRepresent(long numberOfElements) {
|
||||
if (numberOfElements < 0)
|
||||
throw new ReviewedStingException("Number of elements must be positive: " + numberOfElements);
|
||||
|
||||
if (numberOfElements == 1L)
|
||||
return 1; // special case
|
||||
|
||||
int n = 0;
|
||||
numberOfElements--;
|
||||
while (numberOfElements > 0) {
|
||||
numberOfElements = numberOfElements >> 1;
|
||||
n++;
|
||||
}
|
||||
return n;
|
||||
}
|
||||
|
||||
/**
|
||||
* Aggregate information for each Covariate
|
||||
*/
|
||||
private static class RequiredCovariateInfo {
|
||||
public final int nBits; // number of bits for this key
|
||||
public final int offset; // the offset into the master key
|
||||
public final long mask; // the mask to pull out this covariate from the combined bitset key ( a mask made from bitsBefore and nBits )
|
||||
public final Covariate covariate; // this allows reverse lookup of the Covariates in order
|
||||
|
||||
RequiredCovariateInfo(final int nBits, final int offset, final long mask, final Covariate covariate) {
|
||||
this.nBits = nBits;
|
||||
this.offset = offset;
|
||||
this.mask = mask;
|
||||
this.covariate = covariate;
|
||||
}
|
||||
}
|
||||
|
||||
private static class OptionalCovariateInfo {
|
||||
public final long covariateID; // cache the covariate ID (must be a long so that bit-fiddling works)
|
||||
public final Covariate covariate;
|
||||
|
||||
OptionalCovariateInfo(final long covariateID, final Covariate covariate) {
|
||||
this.covariateID = covariateID;
|
||||
this.covariate = covariate;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -32,6 +32,8 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
|||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.util.ArrayList;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: rpoplin
|
||||
|
|
@ -43,6 +45,19 @@ public class ContextCovariate implements StandardCovariate {
|
|||
private int mismatchesContextSize;
|
||||
private int indelsContextSize;
|
||||
|
||||
private int mismatchesKeyMask;
|
||||
private int indelsKeyMask;
|
||||
|
||||
private static final int LENGTH_BITS = 4;
|
||||
private static final int LENGTH_MASK = 15;
|
||||
|
||||
// temporary lists to use for creating context covariate keys
|
||||
private final ArrayList<Integer> mismatchKeys = new ArrayList<Integer>(200);
|
||||
private final ArrayList<Integer> indelKeys = new ArrayList<Integer>(200);
|
||||
|
||||
// the maximum context size (number of bases) permitted; we need to keep the leftmost base free so that values are
|
||||
// not negative and we reserve 4 more bits to represent the length of the context; it takes 2 bits to encode one base.
|
||||
static final private int MAX_DNA_CONTEXT = 13;
|
||||
private byte LOW_QUAL_TAIL;
|
||||
|
||||
// Initialize any member variables using the command-line arguments passed to the walkers
|
||||
|
|
@ -59,11 +74,15 @@ public class ContextCovariate implements StandardCovariate {
|
|||
|
||||
if (mismatchesContextSize <= 0 || indelsContextSize <= 0)
|
||||
throw new UserException(String.format("Context size must be positive, if you don't want to use the context covariate, just turn it off instead. Mismatches: %d Indels: %d", mismatchesContextSize, indelsContextSize));
|
||||
|
||||
mismatchesKeyMask = createMask(mismatchesContextSize);
|
||||
indelsKeyMask = createMask(indelsContextSize);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void recordValues(final GATKSAMRecord read, final ReadCovariates values) {
|
||||
|
||||
// TODO -- wrong: fix me
|
||||
final GATKSAMRecord clippedRead = ReadClipper.clipLowQualEnds(read, LOW_QUAL_TAIL, ClippingRepresentation.WRITE_NS); // Write N's over the low quality tail of the reads to avoid adding them into the context
|
||||
|
||||
final boolean negativeStrand = clippedRead.getReadNegativeStrandFlag();
|
||||
|
|
@ -71,10 +90,15 @@ public class ContextCovariate implements StandardCovariate {
|
|||
if (negativeStrand)
|
||||
bases = BaseUtils.simpleReverseComplement(bases);
|
||||
|
||||
final int readLength = clippedRead.getReadLength();
|
||||
mismatchKeys.clear();
|
||||
indelKeys.clear();
|
||||
contextWith(bases, mismatchesContextSize, mismatchKeys, mismatchesKeyMask);
|
||||
contextWith(bases, indelsContextSize, indelKeys, indelsKeyMask);
|
||||
|
||||
final int readLength = bases.length;
|
||||
for (int i = 0; i < readLength; i++) {
|
||||
final long indelKey = contextWith(bases, i, indelsContextSize);
|
||||
values.addCovariate(contextWith(bases, i, mismatchesContextSize), indelKey, indelKey, (negativeStrand ? readLength - i - 1 : i));
|
||||
final int indelKey = indelKeys.get(i);
|
||||
values.addCovariate(mismatchKeys.get(i), indelKey, indelKey, (negativeStrand ? readLength - i - 1 : i));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -85,7 +109,7 @@ public class ContextCovariate implements StandardCovariate {
|
|||
}
|
||||
|
||||
@Override
|
||||
public String formatKey(final long key) {
|
||||
public String formatKey(final int key) {
|
||||
if (key == -1) // this can only happen in test routines because we do not propagate null keys to the csv file
|
||||
return null;
|
||||
|
||||
|
|
@ -93,147 +117,126 @@ public class ContextCovariate implements StandardCovariate {
|
|||
}
|
||||
|
||||
@Override
|
||||
public long longFromKey(Object key) {
|
||||
return keyFromContext((String) key);
|
||||
public int keyFromValue(final Object value) {
|
||||
return keyFromContext((String) value);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int numberOfBits() {
|
||||
return Integer.bitCount(Integer.MAX_VALUE);
|
||||
private static int createMask(final int contextSize) {
|
||||
int mask = 0;
|
||||
// create 2*contextSize worth of bits
|
||||
for (int i = 0; i < contextSize; i++)
|
||||
mask = (mask << 2) | 3;
|
||||
// shift 4 bits to mask out the bits used to encode the length
|
||||
return mask << LENGTH_BITS;
|
||||
}
|
||||
|
||||
/**
|
||||
* calculates the context of a base independent of the covariate mode (mismatch, insertion or deletion)
|
||||
*
|
||||
* @param bases the bases in the read to build the context from
|
||||
* @param offset the position in the read to calculate the context for
|
||||
* @param contextSize context size to use building the context
|
||||
* @return the key representing the context
|
||||
* @param keys list to store the keys
|
||||
* @param mask mask for pulling out just the context bits
|
||||
*/
|
||||
private long contextWith(final byte[] bases, final int offset, final int contextSize) {
|
||||
final int start = offset - contextSize + 1;
|
||||
final long result;
|
||||
if (start >= 0)
|
||||
result = keyFromContext(bases, start, offset + 1);
|
||||
else
|
||||
result = -1L;
|
||||
return result;
|
||||
private static void contextWith(final byte[] bases, final int contextSize, final ArrayList<Integer> keys, final int mask) {
|
||||
|
||||
// the first contextSize-1 bases will not have enough previous context
|
||||
for (int i = 1; i < contextSize && i <= bases.length; i++)
|
||||
keys.add(-1);
|
||||
|
||||
if (bases.length < contextSize)
|
||||
return;
|
||||
|
||||
final int newBaseOffset = 2 * (contextSize - 1) + LENGTH_BITS;
|
||||
|
||||
// get (and add) the key for the context starting at the first base
|
||||
int currentKey = keyFromContext(bases, 0, contextSize);
|
||||
keys.add(currentKey);
|
||||
|
||||
// if the first key was -1 then there was an N in the context; figure out how many more consecutive contexts it affects
|
||||
int currentNPenalty = 0;
|
||||
if (currentKey == -1) {
|
||||
currentKey = 0;
|
||||
currentNPenalty = contextSize - 1;
|
||||
int offset = newBaseOffset;
|
||||
while (bases[currentNPenalty] != 'N') {
|
||||
final int baseIndex = BaseUtils.simpleBaseToBaseIndex(bases[currentNPenalty]);
|
||||
currentKey |= (baseIndex << offset);
|
||||
offset -= 2;
|
||||
currentNPenalty--;
|
||||
}
|
||||
}
|
||||
|
||||
final int readLength = bases.length;
|
||||
for (int currentIndex = contextSize; currentIndex < readLength; currentIndex++) {
|
||||
final int baseIndex = BaseUtils.simpleBaseToBaseIndex(bases[currentIndex]);
|
||||
if (baseIndex == -1) { // ignore non-ACGT bases
|
||||
currentNPenalty = contextSize;
|
||||
currentKey = 0; // reset the key
|
||||
} else {
|
||||
// push this base's contribution onto the key: shift everything 2 bits, mask out the non-context bits, and add the new base and the length in
|
||||
currentKey = (currentKey >> 2) & mask;
|
||||
currentKey |= (baseIndex << newBaseOffset);
|
||||
currentKey |= contextSize;
|
||||
}
|
||||
|
||||
if (currentNPenalty == 0) {
|
||||
keys.add(currentKey);
|
||||
} else {
|
||||
currentNPenalty--;
|
||||
keys.add(-1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public static long keyFromContext(final String dna) {
|
||||
public static int keyFromContext(final String dna) {
|
||||
return keyFromContext(dna.getBytes(), 0, dna.length());
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a long representation of a given dna string.
|
||||
* Creates a int representation of a given dna string.
|
||||
*
|
||||
* Warning: This conversion is limited to long precision, therefore the dna sequence cannot
|
||||
* be longer than 31 bases.
|
||||
*
|
||||
* The bit representation of a dna string is the simple:
|
||||
* 0 A 4 AA 8 CA
|
||||
* 1 C 5 AC ...
|
||||
* 2 G 6 AG 1343 TTGGT
|
||||
* 3 T 7 AT 1364 TTTTT
|
||||
*
|
||||
* To convert from dna to number, we convert the dna string to base10 and add all combinations that
|
||||
* preceded the string (with smaller lengths).
|
||||
*
|
||||
* @param dna the dna sequence
|
||||
* @param dna the dna sequence
|
||||
* @param start the start position in the byte array (inclusive)
|
||||
* @param end the end position in the array (exclusive)
|
||||
* @return the key representing the dna sequence
|
||||
*/
|
||||
public static long keyFromContext(final byte[] dna, final int start, final int end) {
|
||||
final long preContext = combinationsPerLength[end - start - 1]; // the sum of all combinations that preceded the length of the dna string
|
||||
long baseTen = 0L; // the number in base_10 that we are going to use to generate the bit set
|
||||
private static int keyFromContext(final byte[] dna, final int start, final int end) {
|
||||
|
||||
int key = end - start;
|
||||
int bitOffset = 4;
|
||||
for (int i = start; i < end; i++) {
|
||||
baseTen = (baseTen << 2); // multiply by 4
|
||||
final int baseIndex = BaseUtils.simpleBaseToBaseIndex(dna[i]);
|
||||
if (baseIndex == -1) // ignore non-ACGT bases
|
||||
return -1L;
|
||||
baseTen += (long)baseIndex;
|
||||
return -1;
|
||||
key |= (baseIndex << bitOffset);
|
||||
bitOffset += 2;
|
||||
}
|
||||
return baseTen + preContext; // the number representing this DNA string is the base_10 representation plus all combinations that preceded this string length.
|
||||
}
|
||||
|
||||
static final private int MAX_DNA_CONTEXT = 31; // the maximum context size (number of bases) permitted in the "long bitset" implementation of the DNA <=> BitSet conversion.
|
||||
static final long[] combinationsPerLength = new long[MAX_DNA_CONTEXT + 1]; // keeps the memoized table with the number of combinations for each given DNA context length
|
||||
static {
|
||||
for (int i = 0; i < MAX_DNA_CONTEXT + 1; i++)
|
||||
computeCombinationsFor(i);
|
||||
}
|
||||
|
||||
/**
|
||||
* The sum of all combinations of a context of a given length from length = 0 to length.
|
||||
*
|
||||
* Memoized implementation of sum(4^i) , where i=[0,length]
|
||||
*
|
||||
* @param length the length of the DNA context
|
||||
*/
|
||||
private static void computeCombinationsFor(final int length) {
|
||||
long combinations = 0L;
|
||||
for (int i = 1; i <= length; i++)
|
||||
combinations += (1L << 2 * i); // add all combinations with 4^i ( 4^i is the same as 2^(2*i) )
|
||||
combinationsPerLength[length] = combinations;
|
||||
return key;
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts a key into the dna string representation.
|
||||
*
|
||||
* Warning: This conversion is limited to long precision, therefore the dna sequence cannot
|
||||
* be longer than 31 bases.
|
||||
*
|
||||
* We calculate the length of the resulting DNA sequence by looking at the sum(4^i) that exceeds the
|
||||
* base_10 representation of the sequence. This is important for us to know how to bring the number
|
||||
* to a quasi-canonical base_4 representation, and to fill in leading A's (since A's are represented
|
||||
* as 0's and leading 0's are omitted).
|
||||
*
|
||||
* quasi-canonical because A is represented by a 0, therefore,
|
||||
* instead of : 0, 1, 2, 3, 10, 11, 12, ...
|
||||
* we have : 0, 1, 2, 3, 00, 01, 02, ...
|
||||
*
|
||||
* but we can correctly decode it because we know the final length.
|
||||
*
|
||||
* @param key the key representing the dna sequence
|
||||
* @return the dna sequence represented by the key
|
||||
*/
|
||||
public static String contextFromKey(long key) {
|
||||
public static String contextFromKey(final int key) {
|
||||
if (key < 0)
|
||||
throw new ReviewedStingException("dna conversion cannot handle negative numbers. Possible overflow?");
|
||||
|
||||
final int length = contextLengthFor(key); // the length of the context (the number of combinations is memoized, so costs zero to separate this into two method calls)
|
||||
key -= combinationsPerLength[length - 1]; // subtract the the number of combinations of the preceding context from the number to get to the quasi-canonical representation
|
||||
final int length = key & LENGTH_MASK; // the first bits represent the length (in bp) of the context
|
||||
int mask = 48; // use the mask to pull out bases
|
||||
int offset = 4;
|
||||
|
||||
StringBuilder dna = new StringBuilder();
|
||||
while (key > 0) { // perform a simple base_10 to base_4 conversion (quasi-canonical)
|
||||
final byte base = (byte) (key & 3); // equivalent to (key % 4)
|
||||
dna.append((char)BaseUtils.baseIndexToSimpleBase(base));
|
||||
key = key >> 2; // divide by 4
|
||||
for (int i = 0; i < length; i++) {
|
||||
final int baseIndex = (key & mask) >> offset;
|
||||
dna.append((char)BaseUtils.baseIndexToSimpleBase(baseIndex));
|
||||
mask = mask << 2; // move the mask over to the next 2 bits
|
||||
offset += 2;
|
||||
}
|
||||
for (int j = dna.length(); j < length; j++)
|
||||
dna.append('A'); // add leading A's as necessary (due to the "quasi" canonical status, see description above)
|
||||
|
||||
return dna.reverse().toString(); // make sure to reverse the string since we should have been pre-pending all along
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculates the length of the DNA context for a given base 10 number
|
||||
*
|
||||
* It is important to know the length given the base 10 number to calculate the number of combinations
|
||||
* and to disambiguate the "quasi-canonical" state.
|
||||
*
|
||||
* This method also calculates the number of combinations as a by-product, but since it memoizes the
|
||||
* results, a subsequent call to combinationsFor(length) is O(1).
|
||||
*
|
||||
* @param number the base 10 representation of the key
|
||||
* @return the length of the DNA context represented by this number
|
||||
*/
|
||||
private static int contextLengthFor(final long number) {
|
||||
int length = 1; // the calculated length of the DNA sequence given the base_10 representation of its BitSet.
|
||||
long combinations = combinationsPerLength[length]; // the next context (we advance it so we know which one was preceding it).
|
||||
while (combinations <= number) { // find the length of the dna string (length)
|
||||
length++;
|
||||
combinations = combinationsPerLength[length]; // calculate the next context
|
||||
}
|
||||
return length;
|
||||
return dna.toString();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -67,7 +67,7 @@ public interface Covariate {
|
|||
* @param key the long representation of the key
|
||||
* @return a string representation of the key
|
||||
*/
|
||||
public String formatKey(final long key);
|
||||
public String formatKey(final int key);
|
||||
|
||||
/**
|
||||
* Converts an Object key into a long key using only the lowest numberOfBits() bits
|
||||
|
|
@ -75,18 +75,10 @@ public interface Covariate {
|
|||
* Only necessary for on-the-fly recalibration when you have the object, but need to store it in memory in long format. For counting covariates
|
||||
* the getValues method already returns all values in long format.
|
||||
*
|
||||
* @param key the object corresponding to the covariate
|
||||
* @param value the object corresponding to the covariate
|
||||
* @return a long representation of the object
|
||||
*/
|
||||
public long longFromKey(final Object key);
|
||||
|
||||
/**
|
||||
* Each covariate should determine how many bits are necessary to encode it's data
|
||||
*
|
||||
* @return The number of bits used to represent the values of this covariate.
|
||||
*/
|
||||
public int numberOfBits();
|
||||
|
||||
public int keyFromValue(final Object value);
|
||||
}
|
||||
|
||||
interface RequiredCovariate extends Covariate {}
|
||||
|
|
|
|||
|
|
@ -79,7 +79,7 @@ public class CycleCovariate implements StandardCovariate {
|
|||
final int CUSHION = 4;
|
||||
final int MAX_CYCLE = readLength - CUSHION - 1;
|
||||
for (int i = 0; i < readLength; i++) {
|
||||
final long key = (i<CUSHION || i>MAX_CYCLE) ? -1L : keyFromCycle(cycle);
|
||||
final int key = (i<CUSHION || i>MAX_CYCLE) ? -1 : keyFromCycle(cycle);
|
||||
values.addCovariate(key, key, key, i);
|
||||
cycle += increment;
|
||||
}
|
||||
|
|
@ -106,22 +106,22 @@ public class CycleCovariate implements StandardCovariate {
|
|||
int iii = 0;
|
||||
while (iii < readLength) {
|
||||
while (iii < readLength && bases[iii] == (byte) 'T') {
|
||||
final long key = keyFromCycle(cycle);
|
||||
final int key = keyFromCycle(cycle);
|
||||
values.addCovariate(key, key, key, iii);
|
||||
iii++;
|
||||
}
|
||||
while (iii < readLength && bases[iii] == (byte) 'A') {
|
||||
final long key = keyFromCycle(cycle);
|
||||
final int key = keyFromCycle(cycle);
|
||||
values.addCovariate(key, key, key, iii);
|
||||
iii++;
|
||||
}
|
||||
while (iii < readLength && bases[iii] == (byte) 'C') {
|
||||
final long key = keyFromCycle(cycle);
|
||||
final int key = keyFromCycle(cycle);
|
||||
values.addCovariate(key, key, key, iii);
|
||||
iii++;
|
||||
}
|
||||
while (iii < readLength && bases[iii] == (byte) 'G') {
|
||||
final long key = keyFromCycle(cycle);
|
||||
final int key = keyFromCycle(cycle);
|
||||
values.addCovariate(key, key, key, iii);
|
||||
iii++;
|
||||
}
|
||||
|
|
@ -132,7 +132,7 @@ public class CycleCovariate implements StandardCovariate {
|
|||
cycle++;
|
||||
}
|
||||
if (iii < readLength && !BaseUtils.isRegularBase(bases[iii])) {
|
||||
final long key = keyFromCycle(cycle);
|
||||
final int key = keyFromCycle(cycle);
|
||||
values.addCovariate(key, key, key, iii);
|
||||
iii++;
|
||||
}
|
||||
|
|
@ -143,22 +143,22 @@ public class CycleCovariate implements StandardCovariate {
|
|||
int iii = readLength - 1;
|
||||
while (iii >= 0) {
|
||||
while (iii >= 0 && bases[iii] == (byte) 'T') {
|
||||
final long key = keyFromCycle(cycle);
|
||||
final int key = keyFromCycle(cycle);
|
||||
values.addCovariate(key, key, key, iii);
|
||||
iii--;
|
||||
}
|
||||
while (iii >= 0 && bases[iii] == (byte) 'A') {
|
||||
final long key = keyFromCycle(cycle);
|
||||
final int key = keyFromCycle(cycle);
|
||||
values.addCovariate(key, key, key, iii);
|
||||
iii--;
|
||||
}
|
||||
while (iii >= 0 && bases[iii] == (byte) 'C') {
|
||||
final long key = keyFromCycle(cycle);
|
||||
final int key = keyFromCycle(cycle);
|
||||
values.addCovariate(key, key, key, iii);
|
||||
iii--;
|
||||
}
|
||||
while (iii >= 0 && bases[iii] == (byte) 'G') {
|
||||
final long key = keyFromCycle(cycle);
|
||||
final int key = keyFromCycle(cycle);
|
||||
values.addCovariate(key, key, key, iii);
|
||||
iii--;
|
||||
}
|
||||
|
|
@ -169,7 +169,7 @@ public class CycleCovariate implements StandardCovariate {
|
|||
cycle++;
|
||||
}
|
||||
if (iii >= 0 && !BaseUtils.isRegularBase(bases[iii])) {
|
||||
final long key = keyFromCycle(cycle);
|
||||
final int key = keyFromCycle(cycle);
|
||||
values.addCovariate(key, key, key, iii);
|
||||
iii--;
|
||||
}
|
||||
|
|
@ -190,26 +190,21 @@ public class CycleCovariate implements StandardCovariate {
|
|||
}
|
||||
|
||||
@Override
|
||||
public String formatKey(final long key) {
|
||||
long cycle = key >> 1; // shift so we can remove the "sign" bit
|
||||
public String formatKey(final int key) {
|
||||
int cycle = key >> 1; // shift so we can remove the "sign" bit
|
||||
if ( (key & 1) != 0 ) // is the last bit set?
|
||||
cycle *= -1; // then the cycle is negative
|
||||
return String.format("%d", cycle);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long longFromKey(final Object key) {
|
||||
return (key instanceof String) ? keyFromCycle(Integer.parseInt((String) key)) : keyFromCycle((Integer) key);
|
||||
public int keyFromValue(final Object value) {
|
||||
return (value instanceof String) ? keyFromCycle(Integer.parseInt((String) value)) : keyFromCycle((Integer) value);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int numberOfBits() {
|
||||
return Integer.bitCount(Integer.MAX_VALUE);
|
||||
}
|
||||
|
||||
private static long keyFromCycle(final int cycle) {
|
||||
private static int keyFromCycle(final int cycle) {
|
||||
// no negative values because values must fit into the first few bits of the long
|
||||
long result = Math.abs(cycle);
|
||||
int result = Math.abs(cycle);
|
||||
result = result << 1; // shift so we can add the "sign" bit
|
||||
if ( cycle < 0 )
|
||||
result++; // negative cycles get the lower-most bit set
|
||||
|
|
|
|||
|
|
@ -1,6 +1,5 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
/*
|
||||
|
|
@ -49,7 +48,7 @@ public class QualityScoreCovariate implements RequiredCovariate {
|
|||
final byte[] baseDeletionQualities = read.getBaseDeletionQualities();
|
||||
|
||||
for (int i = 0; i < baseQualities.length; i++) {
|
||||
values.addCovariate((long)baseQualities[i], (long)baseInsertionQualities[i], (long)baseDeletionQualities[i], i);
|
||||
values.addCovariate((int)baseQualities[i], (int)baseInsertionQualities[i], (int)baseDeletionQualities[i], i);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -60,17 +59,12 @@ public class QualityScoreCovariate implements RequiredCovariate {
|
|||
}
|
||||
|
||||
@Override
|
||||
public String formatKey(final long key) {
|
||||
public String formatKey(final int key) {
|
||||
return String.format("%d", key);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long longFromKey(final Object key) {
|
||||
return (key instanceof String) ? (long)Byte.parseByte((String) key) : (long)(Byte) key;
|
||||
public int keyFromValue(final Object value) {
|
||||
return (value instanceof String) ? (int)Byte.parseByte((String) value) : (int)(Byte) value;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int numberOfBits() {
|
||||
return BQSRKeyManager.numberOfBitsToRepresent(QualityUtils.MAX_QUAL_SCORE);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,13 +1,14 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||
|
||||
import org.broadinstitute.sting.gatk.report.GATKReportTable;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.collections.NestedHashMap;
|
||||
import org.broadinstitute.sting.utils.recalibration.QualQuantizer;
|
||||
import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Class that encapsulates the information necessary for quality score quantization for BQSR
|
||||
|
|
@ -30,25 +31,17 @@ public class QuantizationInfo {
|
|||
this(quantizedQuals, empiricalQualCounts, calculateQuantizationLevels(quantizedQuals));
|
||||
}
|
||||
|
||||
public QuantizationInfo(Map<BQSRKeyManager, Map<Long, RecalDatum>> keysAndTablesMap, int quantizationLevels) {
|
||||
public QuantizationInfo(final RecalibrationTables recalibrationTables, final int quantizationLevels) {
|
||||
final Long [] qualHistogram = new Long[QualityUtils.MAX_QUAL_SCORE+1]; // create a histogram with the empirical quality distribution
|
||||
for (int i = 0; i < qualHistogram.length; i++)
|
||||
qualHistogram[i] = 0L;
|
||||
|
||||
Map<Long, RecalDatum> qualTable = null; // look for the quality score table
|
||||
for (Map.Entry<BQSRKeyManager, Map<Long, RecalDatum>> entry : keysAndTablesMap.entrySet()) {
|
||||
BQSRKeyManager keyManager = entry.getKey();
|
||||
if (keyManager.getNumRequiredCovariates() == 2) // it should be the only one with 2 required covariates
|
||||
qualTable = entry.getValue();
|
||||
}
|
||||
final NestedHashMap qualTable = recalibrationTables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE); // get the quality score table
|
||||
|
||||
if (qualTable == null)
|
||||
throw new ReviewedStingException("Could not find QualityScore table.");
|
||||
|
||||
for (RecalDatum datum : qualTable.values()) {
|
||||
int empiricalQual = (int) Math.round(datum.getEmpiricalQuality()); // convert the empirical quality to an integer ( it is already capped by MAX_QUAL )
|
||||
long nObservations = datum.numObservations;
|
||||
qualHistogram[empiricalQual] += nObservations; // add the number of observations for every key
|
||||
for (final Object value : qualTable.getAllValues()) {
|
||||
final RecalDatum datum = (RecalDatum)value;
|
||||
final int empiricalQual = MathUtils.fastRound(datum.getEmpiricalQuality()); // convert the empirical quality to an integer ( it is already capped by MAX_QUAL )
|
||||
qualHistogram[empiricalQual] += datum.numObservations; // add the number of observations for every key
|
||||
}
|
||||
empiricalQualCounts = Arrays.asList(qualHistogram); // histogram with the number of observations of the empirical qualities
|
||||
quantizeQualityScores(quantizationLevels);
|
||||
|
|
|
|||
|
|
@ -1,7 +1,5 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
/**
|
||||
* The object temporarily held by a read that describes all of it's covariates.
|
||||
*
|
||||
|
|
@ -11,65 +9,56 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
|||
* @since 2/8/12
|
||||
*/
|
||||
public class ReadCovariates {
|
||||
private final long[][] mismatchesKeySet;
|
||||
private final long[][] insertionsKeySet;
|
||||
private final long[][] deletionsKeySet;
|
||||
private final int[][][] keys;
|
||||
|
||||
private int currentCovariateIndex = 0;
|
||||
|
||||
public ReadCovariates(int readLength, int numberOfCovariates) {
|
||||
this.mismatchesKeySet = new long[readLength][numberOfCovariates];
|
||||
this.insertionsKeySet = new long[readLength][numberOfCovariates];
|
||||
this.deletionsKeySet = new long[readLength][numberOfCovariates];
|
||||
public ReadCovariates(final int readLength, final int numberOfCovariates) {
|
||||
keys = new int[EventType.values().length][readLength][numberOfCovariates];
|
||||
}
|
||||
|
||||
public void setCovariateIndex(final int index) {
|
||||
currentCovariateIndex = index;
|
||||
}
|
||||
|
||||
public void addCovariate(final long mismatch, final long insertion, final long deletion, final int readOffset) {
|
||||
mismatchesKeySet[readOffset][currentCovariateIndex] = mismatch;
|
||||
insertionsKeySet[readOffset][currentCovariateIndex] = insertion;
|
||||
deletionsKeySet[readOffset][currentCovariateIndex] = deletion;
|
||||
public void addCovariate(final int mismatch, final int insertion, final int deletion, final int readOffset) {
|
||||
keys[EventType.BASE_SUBSTITUTION.index][readOffset][currentCovariateIndex] = mismatch;
|
||||
keys[EventType.BASE_INSERTION.index][readOffset][currentCovariateIndex] = insertion;
|
||||
keys[EventType.BASE_DELETION.index][readOffset][currentCovariateIndex] = deletion;
|
||||
}
|
||||
|
||||
public long[] getKeySet(final int readPosition, final EventType errorModel) {
|
||||
switch (errorModel) {
|
||||
case BASE_SUBSTITUTION:
|
||||
return getMismatchesKeySet(readPosition);
|
||||
case BASE_INSERTION:
|
||||
return getInsertionsKeySet(readPosition);
|
||||
case BASE_DELETION:
|
||||
return getDeletionsKeySet(readPosition);
|
||||
default:
|
||||
throw new ReviewedStingException("Unrecognized Base Recalibration type: " + errorModel);
|
||||
}
|
||||
public int[] getKeySet(final int readPosition, final EventType errorModel) {
|
||||
return keys[errorModel.index][readPosition];
|
||||
}
|
||||
|
||||
public long[] getMismatchesKeySet(final int readPosition) {
|
||||
return mismatchesKeySet[readPosition];
|
||||
public int[][] getKeySet(final EventType errorModel) {
|
||||
return keys[errorModel.index];
|
||||
}
|
||||
|
||||
public long[] getInsertionsKeySet(final int readPosition) {
|
||||
return insertionsKeySet[readPosition];
|
||||
public int[] getMismatchesKeySet(final int readPosition) {
|
||||
return keys[EventType.BASE_SUBSTITUTION.index][readPosition];
|
||||
}
|
||||
|
||||
public long[] getDeletionsKeySet(final int readPosition) {
|
||||
return deletionsKeySet[readPosition];
|
||||
public int[] getInsertionsKeySet(final int readPosition) {
|
||||
return keys[EventType.BASE_INSERTION.index][readPosition];
|
||||
}
|
||||
|
||||
public int[] getDeletionsKeySet(final int readPosition) {
|
||||
return keys[EventType.BASE_DELETION.index][readPosition];
|
||||
}
|
||||
|
||||
/**
|
||||
* Testing routines
|
||||
*/
|
||||
protected long[][] getMismatchesKeySet() {
|
||||
return mismatchesKeySet;
|
||||
protected int[][] getMismatchesKeySet() {
|
||||
return keys[EventType.BASE_SUBSTITUTION.index];
|
||||
}
|
||||
|
||||
protected long[][] getInsertionsKeySet() {
|
||||
return insertionsKeySet;
|
||||
protected int[][] getInsertionsKeySet() {
|
||||
return keys[EventType.BASE_INSERTION.index];
|
||||
}
|
||||
|
||||
protected long[][] getDeletionsKeySet() {
|
||||
return deletionsKeySet;
|
||||
protected int[][] getDeletionsKeySet() {
|
||||
return keys[EventType.BASE_DELETION.index];
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -40,9 +40,9 @@ import java.util.HashMap;
|
|||
|
||||
public class ReadGroupCovariate implements RequiredCovariate {
|
||||
|
||||
private final HashMap<String, Long> readGroupLookupTable = new HashMap<String, Long>();
|
||||
private final HashMap<Long, String> readGroupReverseLookupTable = new HashMap<Long, String>();
|
||||
private long nextId = 0L;
|
||||
private final HashMap<String, Integer> readGroupLookupTable = new HashMap<String, Integer>();
|
||||
private final HashMap<Integer, String> readGroupReverseLookupTable = new HashMap<Integer, String>();
|
||||
private int nextId = 0;
|
||||
|
||||
// Initialize any member variables using the command-line arguments passed to the walkers
|
||||
@Override
|
||||
|
|
@ -51,7 +51,7 @@ public class ReadGroupCovariate implements RequiredCovariate {
|
|||
@Override
|
||||
public void recordValues(final GATKSAMRecord read, final ReadCovariates values) {
|
||||
final String readGroupId = readGroupValueFromRG(read.getReadGroup());
|
||||
final long key = keyForReadGroup(readGroupId);
|
||||
final int key = keyForReadGroup(readGroupId);
|
||||
|
||||
final int l = read.getReadLength();
|
||||
for (int i = 0; i < l; i++)
|
||||
|
|
@ -64,21 +64,16 @@ public class ReadGroupCovariate implements RequiredCovariate {
|
|||
}
|
||||
|
||||
@Override
|
||||
public String formatKey(final long key) {
|
||||
public String formatKey(final int key) {
|
||||
return readGroupReverseLookupTable.get(key);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long longFromKey(Object key) {
|
||||
return keyForReadGroup((String) key);
|
||||
public int keyFromValue(final Object value) {
|
||||
return keyForReadGroup((String) value);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int numberOfBits() {
|
||||
return BQSRKeyManager.numberOfBitsToRepresent(Short.MAX_VALUE);
|
||||
}
|
||||
|
||||
private long keyForReadGroup(final String readGroupId) {
|
||||
private int keyForReadGroup(final String readGroupId) {
|
||||
if (!readGroupLookupTable.containsKey(readGroupId)) {
|
||||
readGroupLookupTable.put(readGroupId, nextId);
|
||||
readGroupReverseLookupTable.put(nextId, readGroupId);
|
||||
|
|
|
|||
|
|
@ -32,11 +32,13 @@ import org.broadinstitute.sting.utils.BaseUtils;
|
|||
import org.broadinstitute.sting.utils.R.RScriptExecutor;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
||||
import org.broadinstitute.sting.utils.collections.NestedHashMap;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.io.Resource;
|
||||
import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
|
|
@ -82,6 +84,14 @@ public class RecalDataManager {
|
|||
|
||||
private static final String SCRIPT_FILE = "BQSR.R";
|
||||
|
||||
private static final Pair<String, String> covariateValue = new Pair<String, String>(RecalDataManager.COVARIATE_VALUE_COLUMN_NAME, "%s");
|
||||
private static final Pair<String, String> covariateName = new Pair<String, String>(RecalDataManager.COVARIATE_NAME_COLUMN_NAME, "%s");
|
||||
private static final Pair<String, String> eventType = new Pair<String, String>(RecalDataManager.EVENT_TYPE_COLUMN_NAME, "%s");
|
||||
private static final Pair<String, String> empiricalQuality = new Pair<String, String>(RecalDataManager.EMPIRICAL_QUALITY_COLUMN_NAME, "%.4f");
|
||||
private static final Pair<String, String> estimatedQReported = new Pair<String, String>(RecalDataManager.ESTIMATED_Q_REPORTED_COLUMN_NAME, "%.4f");
|
||||
private static final Pair<String, String> nObservations = new Pair<String, String>(RecalDataManager.NUMBER_OBSERVATIONS_COLUMN_NAME, "%d");
|
||||
private static final Pair<String, String> nErrors = new Pair<String, String>(RecalDataManager.NUMBER_ERRORS_COLUMN_NAME, "%d");
|
||||
|
||||
|
||||
public enum SOLID_RECAL_MODE {
|
||||
/**
|
||||
|
|
@ -141,30 +151,6 @@ public class RecalDataManager {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Initializes the recalibration table -> key manager map
|
||||
*
|
||||
* @param requiredCovariates list of required covariates (in order)
|
||||
* @param optionalCovariates list of optional covariates (in order)
|
||||
* @return a map with each key manager and it's corresponding recalibration table properly initialized
|
||||
*/
|
||||
public static LinkedHashMap<BQSRKeyManager, Map<Long, RecalDatum>> initializeTables(ArrayList<Covariate> requiredCovariates, ArrayList<Covariate> optionalCovariates) {
|
||||
final LinkedHashMap<BQSRKeyManager, Map<Long, RecalDatum>> tablesAndKeysMap = new LinkedHashMap<BQSRKeyManager, Map<Long, RecalDatum>>();
|
||||
final ArrayList<Covariate> requiredCovariatesToAdd = new ArrayList<Covariate>(requiredCovariates.size() + 1); // incrementally add the covariates to create the recal tables with 1, 2 and 3 covariates.
|
||||
final ArrayList<Covariate> optionalCovariatesToAdd = new ArrayList<Covariate>(); // initialize an empty array of optional covariates to create the first few tables
|
||||
for (Covariate covariate : requiredCovariates) {
|
||||
requiredCovariatesToAdd.add(covariate);
|
||||
final Map<Long, RecalDatum> recalTable = new HashMap<Long, RecalDatum>(); // initializing a new recal table for each required covariate (cumulatively)
|
||||
final BQSRKeyManager keyManager = new BQSRKeyManager(requiredCovariatesToAdd, optionalCovariatesToAdd); // initializing it's corresponding key manager
|
||||
tablesAndKeysMap.put(keyManager, recalTable); // adding the pair table+key to the map
|
||||
}
|
||||
final Map<Long, RecalDatum> recalTable = new HashMap<Long, RecalDatum>(Short.MAX_VALUE); // initializing a new recal table to hold all optional covariates
|
||||
final BQSRKeyManager keyManager = new BQSRKeyManager(requiredCovariates, optionalCovariates); // initializing it's corresponding key manager
|
||||
tablesAndKeysMap.put(keyManager, recalTable); // adding the pair table+key to the map
|
||||
return tablesAndKeysMap;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates two lists : required covariates and optional covariates based on the user's requests.
|
||||
*
|
||||
|
|
@ -223,42 +209,29 @@ public class RecalDataManager {
|
|||
logger.info("");
|
||||
}
|
||||
|
||||
private static List<GATKReportTable> generateReportTables(Map<BQSRKeyManager, Map<Long, RecalDatum>> keysAndTablesMap) {
|
||||
private static List<GATKReportTable> generateReportTables(final RecalibrationTables recalibrationTables, final Covariate[] requestedCovariates) {
|
||||
List<GATKReportTable> result = new LinkedList<GATKReportTable>();
|
||||
int tableIndex = 0;
|
||||
|
||||
final Pair<String, String> covariateValue = new Pair<String, String>(RecalDataManager.COVARIATE_VALUE_COLUMN_NAME, "%s");
|
||||
final Pair<String, String> covariateName = new Pair<String, String>(RecalDataManager.COVARIATE_NAME_COLUMN_NAME, "%s");
|
||||
final Pair<String, String> eventType = new Pair<String, String>(RecalDataManager.EVENT_TYPE_COLUMN_NAME, "%s");
|
||||
final Pair<String, String> empiricalQuality = new Pair<String, String>(RecalDataManager.EMPIRICAL_QUALITY_COLUMN_NAME, "%.4f");
|
||||
final Pair<String, String> estimatedQReported = new Pair<String, String>(RecalDataManager.ESTIMATED_Q_REPORTED_COLUMN_NAME, "%.4f");
|
||||
final Pair<String, String> nObservations = new Pair<String, String>(RecalDataManager.NUMBER_OBSERVATIONS_COLUMN_NAME, "%d");
|
||||
final Pair<String, String> nErrors = new Pair<String, String>(RecalDataManager.NUMBER_ERRORS_COLUMN_NAME, "%d");
|
||||
final Map<Covariate, String> covariateNameMap = new HashMap<Covariate, String>(requestedCovariates.length);
|
||||
for (final Covariate covariate : requestedCovariates)
|
||||
covariateNameMap.put(covariate, parseCovariateName(covariate));
|
||||
|
||||
for (Map.Entry<BQSRKeyManager, Map<Long, RecalDatum>> entry : keysAndTablesMap.entrySet()) {
|
||||
final BQSRKeyManager keyManager = entry.getKey();
|
||||
final Map<Long, RecalDatum> recalTable = entry.getValue();
|
||||
for (final RecalibrationTables.TableType type : RecalibrationTables.TableType.values()) {
|
||||
|
||||
final boolean isReadGroupTable = tableIndex == 0; // special case for the read group table so we can print the extra column it needs.
|
||||
|
||||
final Covariate[] requiredList = keyManager.getRequiredCovariates(); // ask the key manager what required covariates were used in this recal table
|
||||
final Covariate[] optionalList = keyManager.getOptionalCovariates(); // ask the key manager what optional covariates were used in this recal table
|
||||
|
||||
final ArrayList<Pair<String, String>> columnNames = new ArrayList<Pair<String, String>>(); // initialize the array to hold the column names
|
||||
|
||||
for (final Covariate covariate : requiredList) {
|
||||
final String name = covariate.getClass().getSimpleName().split("Covariate")[0]; // get the covariate names and put them in order
|
||||
columnNames.add(new Pair<String,String>(name, "%s")); // save the required covariate name so we can reference it in the future
|
||||
}
|
||||
|
||||
if (optionalList.length > 0) {
|
||||
columnNames.add(covariateValue);
|
||||
columnNames.add(covariateName);
|
||||
final ArrayList<Pair<String, String>> columnNames = new ArrayList<Pair<String, String>>(); // initialize the array to hold the column names
|
||||
columnNames.add(new Pair<String, String>(covariateNameMap.get(requestedCovariates[0]), "%s")); // save the required covariate name so we can reference it in the future
|
||||
if (type != RecalibrationTables.TableType.READ_GROUP_TABLE) {
|
||||
columnNames.add(new Pair<String, String>(covariateNameMap.get(requestedCovariates[1]), "%s")); // save the required covariate name so we can reference it in the future
|
||||
if (type == RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLE) {
|
||||
columnNames.add(covariateValue);
|
||||
columnNames.add(covariateName);
|
||||
}
|
||||
}
|
||||
|
||||
columnNames.add(eventType); // the order of these column names is important here
|
||||
columnNames.add(empiricalQuality);
|
||||
if (isReadGroupTable)
|
||||
if (type == RecalibrationTables.TableType.READ_GROUP_TABLE)
|
||||
columnNames.add(estimatedQReported); // only the read group table needs the estimated Q reported
|
||||
columnNames.add(nObservations);
|
||||
columnNames.add(nErrors);
|
||||
|
|
@ -269,42 +242,59 @@ public class RecalDataManager {
|
|||
|
||||
int rowIndex = 0;
|
||||
|
||||
for (Map.Entry<Long, RecalDatum> recalTableEntry : recalTable.entrySet()) { // create a map with column name => key value for all covariate keys
|
||||
final Long bitSetKey = recalTableEntry.getKey();
|
||||
final Map<String, Object> columnData = new HashMap<String, Object>(columnNames.size());
|
||||
final Iterator<Pair<String, String>> iterator = columnNames.iterator();
|
||||
for (final Object key : keyManager.keySetFrom(bitSetKey)) {
|
||||
final String columnName = iterator.next().getFirst();
|
||||
columnData.put(columnName, key);
|
||||
}
|
||||
final RecalDatum datum = recalTableEntry.getValue();
|
||||
columnData.put(iterator.next().getFirst(), datum.getEmpiricalQuality());
|
||||
if (isReadGroupTable)
|
||||
columnData.put(iterator.next().getFirst(), datum.getEstimatedQReported()); // we only add the estimated Q reported in the RG table
|
||||
columnData.put(iterator.next().getFirst(), datum.numObservations);
|
||||
columnData.put(iterator.next().getFirst(), datum.numMismatches);
|
||||
final NestedHashMap table = recalibrationTables.getTable(type);
|
||||
for (final NestedHashMap.Leaf row : table.getAllLeaves()) {
|
||||
final RecalDatum datum = (RecalDatum)row.value;
|
||||
final List<Object> keys = row.keys;
|
||||
|
||||
for (final Map.Entry<String, Object> dataEntry : columnData.entrySet()) {
|
||||
final String columnName = dataEntry.getKey();
|
||||
final Object value = dataEntry.getValue();
|
||||
reportTable.set(rowIndex, columnName, value.toString());
|
||||
int columnIndex = 0;
|
||||
setReportTableCell(reportTable, rowIndex, columnNames.get(columnIndex).getFirst(), requestedCovariates[0].formatKey((Integer)keys.get(columnIndex++)));
|
||||
if (type != RecalibrationTables.TableType.READ_GROUP_TABLE) {
|
||||
setReportTableCell(reportTable, rowIndex, columnNames.get(columnIndex).getFirst(), requestedCovariates[1].formatKey((Integer) keys.get(columnIndex++)));
|
||||
if (type == RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLE) {
|
||||
final int covariateIndex = (Integer)keys.get(columnIndex);
|
||||
final Covariate covariate = requestedCovariates[2 + covariateIndex];
|
||||
final int covariateKey = (Integer)keys.get(columnIndex+1);
|
||||
|
||||
setReportTableCell(reportTable, rowIndex, columnNames.get(columnIndex++).getFirst(), covariate.formatKey(covariateKey));
|
||||
setReportTableCell(reportTable, rowIndex, columnNames.get(columnIndex++).getFirst(), covariateNameMap.get(covariate));
|
||||
}
|
||||
}
|
||||
|
||||
final EventType event = EventType.eventFrom((Integer)keys.get(columnIndex));
|
||||
setReportTableCell(reportTable, rowIndex, columnNames.get(columnIndex++).getFirst(), event);
|
||||
|
||||
setReportTableCell(reportTable, rowIndex, columnNames.get(columnIndex++).getFirst(), datum.getEmpiricalQuality());
|
||||
if (type == RecalibrationTables.TableType.READ_GROUP_TABLE)
|
||||
setReportTableCell(reportTable, rowIndex, columnNames.get(columnIndex++).getFirst(), datum.getEstimatedQReported()); // we only add the estimated Q reported in the RG table
|
||||
setReportTableCell(reportTable, rowIndex, columnNames.get(columnIndex++).getFirst(), datum.numObservations);
|
||||
setReportTableCell(reportTable, rowIndex, columnNames.get(columnIndex).getFirst(), datum.numMismatches);
|
||||
|
||||
rowIndex++;
|
||||
}
|
||||
result.add(reportTable);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
public static void outputRecalibrationReport(RecalibrationArgumentCollection RAC, QuantizationInfo quantizationInfo, Map<BQSRKeyManager, Map<Long, RecalDatum>> keysAndTablesMap, PrintStream outputFile) {
|
||||
outputRecalibrationReport(RAC.generateReportTable(), quantizationInfo.generateReportTable(), generateReportTables(keysAndTablesMap), outputFile);
|
||||
private static String parseCovariateName(final Covariate covariate) {
|
||||
return covariate.getClass().getSimpleName().split("Covariate")[0];
|
||||
}
|
||||
|
||||
public static void outputRecalibrationReport(GATKReportTable argumentTable, QuantizationInfo quantizationInfo, LinkedHashMap<BQSRKeyManager,Map<Long, RecalDatum>> keysAndTablesMap, PrintStream outputFile) {
|
||||
outputRecalibrationReport(argumentTable, quantizationInfo.generateReportTable(), generateReportTables(keysAndTablesMap), outputFile);
|
||||
private static void setReportTableCell(final GATKReportTable reportTable, final int rowIndex, final String columnName, final Object value) {
|
||||
reportTable.set(rowIndex, columnName, value.toString());
|
||||
}
|
||||
|
||||
private static void outputRecalibrationReport(GATKReportTable argumentTable, GATKReportTable quantizationTable, List<GATKReportTable> recalTables, PrintStream outputFile) {
|
||||
public static void outputRecalibrationReport(final RecalibrationArgumentCollection RAC, final QuantizationInfo quantizationInfo, final RecalibrationTables recalibrationTables, final Covariate[] requestedCovariates, final PrintStream outputFile) {
|
||||
outputRecalibrationReport(RAC.generateReportTable(), quantizationInfo.generateReportTable(), generateReportTables(recalibrationTables, requestedCovariates), outputFile);
|
||||
}
|
||||
|
||||
public static void outputRecalibrationReport(final GATKReportTable argumentTable, final QuantizationInfo quantizationInfo, final RecalibrationTables recalibrationTables, final Covariate[] requestedCovariates, final PrintStream outputFile) {
|
||||
outputRecalibrationReport(argumentTable, quantizationInfo.generateReportTable(), generateReportTables(recalibrationTables, requestedCovariates), outputFile);
|
||||
}
|
||||
|
||||
private static void outputRecalibrationReport(final GATKReportTable argumentTable, final GATKReportTable quantizationTable, final List<GATKReportTable> recalTables, final PrintStream outputFile) {
|
||||
final GATKReport report = new GATKReport();
|
||||
report.addTable(argumentTable);
|
||||
report.addTable(quantizationTable);
|
||||
|
|
@ -340,108 +330,87 @@ public class RecalDataManager {
|
|||
|
||||
}
|
||||
|
||||
public static void generateRecalibrationPlot(File filename, LinkedHashMap<BQSRKeyManager, Map<Long, RecalDatum>> original, boolean keepIntermediates) {
|
||||
public static void generateRecalibrationPlot(final File filename, final RecalibrationTables original, final Covariate[] requestedCovariates, final boolean keepIntermediates) {
|
||||
final Pair<PrintStream, File> files = initializeRecalibrationPlot(filename);
|
||||
writeCSV(files.getFirst(), original, "ORIGINAL", true);
|
||||
writeCSV(files.getFirst(), original, "ORIGINAL", requestedCovariates, true);
|
||||
outputRecalibrationPlot(files, keepIntermediates);
|
||||
}
|
||||
|
||||
public static void generateRecalibrationPlot(File filename, LinkedHashMap<BQSRKeyManager, Map<Long, RecalDatum>> original, LinkedHashMap<BQSRKeyManager, Map<Long, RecalDatum>> recalibrated, boolean keepIntermediates) {
|
||||
public static void generateRecalibrationPlot(final File filename, final RecalibrationTables original, final RecalibrationTables recalibrated, final Covariate[] requestedCovariates, final boolean keepIntermediates) {
|
||||
final Pair<PrintStream, File> files = initializeRecalibrationPlot(filename);
|
||||
writeCSV(files.getFirst(), recalibrated, "RECALIBRATED", true);
|
||||
writeCSV(files.getFirst(), original, "ORIGINAL", false);
|
||||
writeCSV(files.getFirst(), recalibrated, "RECALIBRATED", requestedCovariates, true);
|
||||
writeCSV(files.getFirst(), original, "ORIGINAL", requestedCovariates, false);
|
||||
outputRecalibrationPlot(files, keepIntermediates);
|
||||
}
|
||||
|
||||
private static void writeCSV(PrintStream deltaTableFile, LinkedHashMap<BQSRKeyManager, Map<Long, RecalDatum>> map, String recalibrationMode, boolean printHeader) {
|
||||
final int QUALITY_SCORE_COVARIATE_INDEX = 1;
|
||||
final Map<Long, RecalDatum> deltaTable = new HashMap<Long, RecalDatum>();
|
||||
BQSRKeyManager deltaKeyManager = null;
|
||||
private static void writeCSV(final PrintStream deltaTableFile, final RecalibrationTables recalibrationTables, final String recalibrationMode, final Covariate[] requestedCovariates, final boolean printHeader) {
|
||||
final NestedHashMap deltaTable = new NestedHashMap();
|
||||
|
||||
|
||||
for (Map.Entry<BQSRKeyManager, Map<Long, RecalDatum>> tableEntry : map.entrySet()) {
|
||||
final BQSRKeyManager keyManager = tableEntry.getKey();
|
||||
|
||||
if (keyManager.getNumOptionalCovariates() > 0) { // initialize with the 'all covariates' table
|
||||
// create a key manager for the delta table
|
||||
final List<Covariate> requiredCovariates = Arrays.asList(keyManager.getRequiredCovariates()[0]); // include the read group covariate as the only required covariate
|
||||
final List<Covariate> optionalCovariates = new ArrayList<Covariate>();
|
||||
optionalCovariates.add(keyManager.getRequiredCovariates()[1]); // include the quality score covariate as an optional covariate
|
||||
optionalCovariates.addAll(Arrays.asList(keyManager.getOptionalCovariates())); // include all optional covariates
|
||||
deltaKeyManager = new BQSRKeyManager(requiredCovariates, optionalCovariates); // initialize the key manager
|
||||
}
|
||||
// add the quality score table to the delta table
|
||||
final NestedHashMap qualTable = recalibrationTables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE);
|
||||
for (final NestedHashMap.Leaf leaf : qualTable.getAllLeaves()) { // go through every element in the covariates table to create the delta table
|
||||
final List<Object> newCovs = new ArrayList<Object>(4);
|
||||
newCovs.add(leaf.keys.get(0));
|
||||
newCovs.add(requestedCovariates.length); // replace the covariate name with an arbitrary (unused) index for QualityScore
|
||||
newCovs.add(leaf.keys.get(1));
|
||||
newCovs.add(leaf.keys.get(2));
|
||||
addToDeltaTable(deltaTable, newCovs.toArray(), (RecalDatum)leaf.value); // add this covariate to the delta table
|
||||
}
|
||||
|
||||
if (deltaKeyManager == null)
|
||||
throw new ReviewedStingException ("Couldn't find the covariates table");
|
||||
|
||||
boolean readyToPrint = false;
|
||||
for (Map.Entry<BQSRKeyManager, Map<Long, RecalDatum>> tableEntry : map.entrySet()) {
|
||||
final BQSRKeyManager keyManager = tableEntry.getKey();
|
||||
|
||||
if (keyManager.getNumRequiredCovariates() == 2 && keyManager.getNumOptionalCovariates() == 0) { // look for the QualityScore table
|
||||
final Map<Long, RecalDatum> table = tableEntry.getValue();
|
||||
|
||||
// add the quality score table to the delta table
|
||||
for (final Map.Entry<Long, RecalDatum> entry : table.entrySet()) { // go through every element in the covariates table to create the delta table
|
||||
final RecalDatum recalDatum = entry.getValue(); // the current element (recal datum)
|
||||
|
||||
final List<Object> covs = keyManager.keySetFrom(entry.getKey()); // extract the key objects from the bitset key
|
||||
final List<Object> newCovs = new ArrayList<Object>(4);
|
||||
newCovs.add(0, covs.get(0)); // replace the covariate value with the quality score
|
||||
newCovs.add(1, covs.get(1));
|
||||
newCovs.add(2, "QualityScore"); // replace the covariate name with QualityScore (for the QualityScore covariate)
|
||||
newCovs.add(3, covs.get(2));
|
||||
final long deltaKey = deltaKeyManager.longFromKey(newCovs.toArray()); // create a new bitset key for the delta table
|
||||
addToDeltaTable(deltaTable, deltaKey, recalDatum); // add this covariate to the delta table
|
||||
}
|
||||
}
|
||||
|
||||
else if (keyManager.getNumOptionalCovariates() > 0) { // look for the optional covariates table
|
||||
final Map<Long, RecalDatum> table = tableEntry.getValue();
|
||||
|
||||
// add the optional covariates to the delta table
|
||||
for (final Map.Entry<Long, RecalDatum> entry : table.entrySet()) { // go through every element in the covariates table to create the delta table
|
||||
final RecalDatum recalDatum = entry.getValue(); // the current element (recal datum)
|
||||
|
||||
final List<Object> covs = keyManager.keySetFrom(entry.getKey()); // extract the key objects from the bitset key
|
||||
covs.remove(QUALITY_SCORE_COVARIATE_INDEX); // reset the quality score covariate to 0 from the keyset (so we aggregate all rows regardless of QS)
|
||||
final long deltaKey = deltaKeyManager.longFromKey(covs.toArray()); // create a new bitset key for the delta table
|
||||
addToDeltaTable(deltaTable, deltaKey, recalDatum); // add this covariate to the delta table
|
||||
}
|
||||
readyToPrint = true;
|
||||
}
|
||||
|
||||
// output the csv file
|
||||
if (readyToPrint) {
|
||||
|
||||
if (printHeader) {
|
||||
final List<String> header = new LinkedList<String>();
|
||||
header.add("ReadGroup");
|
||||
header.add("CovariateValue");
|
||||
header.add("CovariateName");
|
||||
header.add("EventType");
|
||||
header.add("Observations");
|
||||
header.add("Errors");
|
||||
header.add("EmpiricalQuality");
|
||||
header.add("AverageReportedQuality");
|
||||
header.add("Accuracy");
|
||||
header.add("Recalibration");
|
||||
deltaTableFile.println(Utils.join(",", header));
|
||||
}
|
||||
|
||||
// print each data line
|
||||
for (final Map.Entry<Long, RecalDatum> deltaEntry : deltaTable.entrySet()) {
|
||||
final List<Object> deltaKeys = deltaKeyManager.keySetFrom(deltaEntry.getKey());
|
||||
final RecalDatum deltaDatum = deltaEntry.getValue();
|
||||
deltaTableFile.print(Utils.join(",", deltaKeys));
|
||||
deltaTableFile.print("," + deltaDatum.stringForCSV());
|
||||
deltaTableFile.println("," + recalibrationMode);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// add the optional covariates to the delta table
|
||||
final NestedHashMap covTable = recalibrationTables.getTable(RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLE);
|
||||
for (final NestedHashMap.Leaf leaf : covTable.getAllLeaves()) {
|
||||
final List<Object> covs = new ArrayList<Object>(leaf.keys);
|
||||
covs.remove(1); // reset the quality score covariate to 0 from the keyset (so we aggregate all rows regardless of QS)
|
||||
addToDeltaTable(deltaTable, covs.toArray(), (RecalDatum)leaf.value); // add this covariate to the delta table
|
||||
}
|
||||
|
||||
// output the csv file
|
||||
if (printHeader) {
|
||||
final List<String> header = new LinkedList<String>();
|
||||
header.add("ReadGroup");
|
||||
header.add("CovariateValue");
|
||||
header.add("CovariateName");
|
||||
header.add("EventType");
|
||||
header.add("Observations");
|
||||
header.add("Errors");
|
||||
header.add("EmpiricalQuality");
|
||||
header.add("AverageReportedQuality");
|
||||
header.add("Accuracy");
|
||||
header.add("Recalibration");
|
||||
deltaTableFile.println(Utils.join(",", header));
|
||||
}
|
||||
|
||||
final Map<Covariate, String> covariateNameMap = new HashMap<Covariate, String>(requestedCovariates.length);
|
||||
for (final Covariate covariate : requestedCovariates)
|
||||
covariateNameMap.put(covariate, parseCovariateName(covariate));
|
||||
|
||||
// print each data line
|
||||
for (final NestedHashMap.Leaf leaf : deltaTable.getAllLeaves()) {
|
||||
final List<Object> deltaKeys = generateValuesFromKeys(leaf.keys, requestedCovariates, covariateNameMap);
|
||||
final RecalDatum deltaDatum = (RecalDatum)leaf.value;
|
||||
deltaTableFile.print(Utils.join(",", deltaKeys));
|
||||
deltaTableFile.print("," + deltaDatum.stringForCSV());
|
||||
deltaTableFile.println("," + recalibrationMode);
|
||||
}
|
||||
}
|
||||
|
||||
private static List<Object> generateValuesFromKeys(final List<Object> keys, final Covariate[] covariates, final Map<Covariate, String> covariateNameMap) {
|
||||
final List<Object> values = new ArrayList<Object>(4);
|
||||
values.add(covariates[0].formatKey((Integer)keys.get(0)));
|
||||
|
||||
// TODO -- create static final variables to hold the indexes of the RG, qual, cov ID, etc.
|
||||
|
||||
final int covariateIndex = (Integer)keys.get(1);
|
||||
final Covariate covariate = covariateIndex == covariates.length ? covariates[1] : covariates[2 + covariateIndex];
|
||||
final int covariateKey = (Integer)keys.get(2);
|
||||
values.add(covariate.formatKey(covariateKey));
|
||||
values.add(covariateNameMap.get(covariate));
|
||||
|
||||
final EventType event = EventType.eventFrom((Integer)keys.get(3));
|
||||
values.add(event);
|
||||
|
||||
return values;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -453,15 +422,14 @@ public class RecalDataManager {
|
|||
* @param deltaKey the key to the table
|
||||
* @param recalDatum the recal datum to combine with the accuracyDatum element in the table
|
||||
*/
|
||||
private static void addToDeltaTable(Map<Long, RecalDatum> deltaTable, Long deltaKey, RecalDatum recalDatum) {
|
||||
final RecalDatum deltaDatum = deltaTable.get(deltaKey); // check if we already have a RecalDatum for this key
|
||||
private static void addToDeltaTable(final NestedHashMap deltaTable, final Object[] deltaKey, final RecalDatum recalDatum) {
|
||||
final RecalDatum deltaDatum = (RecalDatum)deltaTable.get(deltaKey); // check if we already have a RecalDatum for this key
|
||||
if (deltaDatum == null)
|
||||
deltaTable.put(deltaKey, new RecalDatum(recalDatum)); // if we don't have a key yet, create a new one with the same values as the curent datum
|
||||
deltaTable.put(new RecalDatum(recalDatum), deltaKey); // if we don't have a key yet, create a new one with the same values as the curent datum
|
||||
else
|
||||
deltaDatum.combine(recalDatum); // if we do have a datum, combine it with this one.
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Section of code shared between the two recalibration walkers which uses the command line arguments to adjust attributes of the read such as quals or platform string
|
||||
*
|
||||
|
|
@ -627,13 +595,13 @@ public class RecalDataManager {
|
|||
*
|
||||
* @param read The read for which to compute covariate values.
|
||||
* @param requestedCovariates The list of requested covariates.
|
||||
* @param readCovariates The object to store the covariate values
|
||||
* @param resultsStorage The object to store the covariate values
|
||||
*/
|
||||
public static void computeCovariates(final GATKSAMRecord read, final Covariate[] requestedCovariates, final ReadCovariates readCovariates) {
|
||||
public static void computeCovariates(final GATKSAMRecord read, final Covariate[] requestedCovariates, final ReadCovariates resultsStorage) {
|
||||
// Loop through the list of requested covariates and compute the values of each covariate for all positions in this read
|
||||
for (int i = 0; i < requestedCovariates.length; i++) {
|
||||
readCovariates.setCovariateIndex(i);
|
||||
requestedCovariates[i].recordValues(read, readCovariates);
|
||||
resultsStorage.setCovariateIndex(i);
|
||||
requestedCovariates[i].recordValues(read, resultsStorage);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -113,8 +113,7 @@ public class RecalDatum extends Datum {
|
|||
return String.format("%s,%d,%.2f", toString(), (byte) Math.floor(getEstimatedQReported()), getEmpiricalQuality() - getEstimatedQReported());
|
||||
}
|
||||
|
||||
|
||||
private double calcExpectedErrors() {
|
||||
private double calcExpectedErrors() {
|
||||
return (double) this.numObservations * qualToErrorProb(estimatedQReported);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -3,8 +3,9 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;
|
|||
import org.broadinstitute.sting.gatk.report.GATKReport;
|
||||
import org.broadinstitute.sting.gatk.report.GATKReportTable;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.collections.NestedHashMap;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.PrintStream;
|
||||
|
|
@ -18,14 +19,19 @@ import java.util.*;
|
|||
*/
|
||||
public class RecalibrationReport {
|
||||
private QuantizationInfo quantizationInfo; // histogram containing the counts for qual quantization (calculated after recalibration is done)
|
||||
private final LinkedHashMap<BQSRKeyManager, Map<Long, RecalDatum>> keysAndTablesMap; // quick access reference to the read group table and its key manager
|
||||
private final RecalibrationTables recalibrationTables; // quick access reference to the tables
|
||||
private final Covariate[] requestedCovariates; // list of all covariates to be used in this calculation
|
||||
private final HashMap<String, Integer> optionalCovariateIndexes;
|
||||
|
||||
private final GATKReportTable argumentTable; // keep the argument table untouched just for output purposes
|
||||
private final RecalibrationArgumentCollection RAC; // necessary for quantizing qualities with the same parameter
|
||||
|
||||
private final Object[] tempRGarray = new Object[2];
|
||||
private final Object[] tempQUALarray = new Object[3];
|
||||
private final Object[] tempCOVarray = new Object[5];
|
||||
|
||||
public RecalibrationReport(final File RECAL_FILE) {
|
||||
GATKReport report = new GATKReport(RECAL_FILE);
|
||||
final GATKReport report = new GATKReport(RECAL_FILE);
|
||||
|
||||
argumentTable = report.getTable(RecalDataManager.ARGUMENT_REPORT_TABLE_TITLE);
|
||||
RAC = initializeArgumentCollectionTable(argumentTable);
|
||||
|
|
@ -37,52 +43,39 @@ public class RecalibrationReport {
|
|||
ArrayList<Covariate> requiredCovariates = covariates.getFirst();
|
||||
ArrayList<Covariate> optionalCovariates = covariates.getSecond();
|
||||
requestedCovariates = new Covariate[requiredCovariates.size() + optionalCovariates.size()];
|
||||
optionalCovariateIndexes = new HashMap<String, Integer>(optionalCovariates.size());
|
||||
int covariateIndex = 0;
|
||||
for (final Covariate covariate : requiredCovariates)
|
||||
requestedCovariates[covariateIndex++] = covariate;
|
||||
for (final Covariate covariate : optionalCovariates)
|
||||
requestedCovariates[covariateIndex++] = covariate;
|
||||
for (final Covariate covariate : optionalCovariates) {
|
||||
requestedCovariates[covariateIndex] = covariate;
|
||||
final String covariateName = covariate.getClass().getSimpleName().split("Covariate")[0]; // get the name of the covariate (without the "covariate" part of it) so we can match with the GATKReport
|
||||
optionalCovariateIndexes.put(covariateName, covariateIndex-2);
|
||||
covariateIndex++;
|
||||
}
|
||||
|
||||
for (Covariate cov : requestedCovariates)
|
||||
cov.initialize(RAC); // initialize any covariate member variables using the shared argument collection
|
||||
|
||||
keysAndTablesMap = new LinkedHashMap<BQSRKeyManager, Map<Long, RecalDatum>>();
|
||||
ArrayList<Covariate> requiredCovariatesToAdd = new ArrayList<Covariate>(requiredCovariates.size()); // incrementally add the covariates to create the recal tables with 1, 2 and 3 covariates.
|
||||
ArrayList<Covariate> optionalCovariatesToAdd = new ArrayList<Covariate>(); // initialize an empty array of optional covariates to create the first few tables
|
||||
for (Covariate covariate : requiredCovariates) {
|
||||
requiredCovariatesToAdd.add(covariate);
|
||||
final Map<Long, RecalDatum> table; // initializing a new recal table for each required covariate (cumulatively)
|
||||
final BQSRKeyManager keyManager = new BQSRKeyManager(requiredCovariatesToAdd, optionalCovariatesToAdd); // initializing it's corresponding key manager
|
||||
final GATKReportTable rgReportTable = report.getTable(RecalDataManager.READGROUP_REPORT_TABLE_TITLE);
|
||||
final NestedHashMap rgTable = parseReadGroupTable(rgReportTable);
|
||||
|
||||
final int nRequiredCovariates = requiredCovariatesToAdd.size(); // the number of required covariates defines which table we are looking at (RG, QUAL or ALL_COVARIATES)
|
||||
final String UNRECOGNIZED_REPORT_TABLE_EXCEPTION = "Unrecognized table. Did you add an extra required covariate? This is a hard check.";
|
||||
if (nRequiredCovariates == 1) { // if there is only one required covariate, this is the read group table
|
||||
final GATKReportTable reportTable = report.getTable(RecalDataManager.READGROUP_REPORT_TABLE_TITLE);
|
||||
table = parseReadGroupTable(keyManager, reportTable);
|
||||
}
|
||||
else if (nRequiredCovariates == 2 && optionalCovariatesToAdd.isEmpty()) { // when we have both required covariates and no optional covariates we're at the QUAL table
|
||||
final GATKReportTable reportTable = report.getTable(RecalDataManager.QUALITY_SCORE_REPORT_TABLE_TITLE);
|
||||
table = parseQualityScoreTable(keyManager, reportTable);
|
||||
}
|
||||
else
|
||||
throw new ReviewedStingException(UNRECOGNIZED_REPORT_TABLE_EXCEPTION);
|
||||
final GATKReportTable qualReportTable = report.getTable(RecalDataManager.QUALITY_SCORE_REPORT_TABLE_TITLE);
|
||||
final NestedHashMap qualTable = parseQualityScoreTable(qualReportTable);
|
||||
|
||||
keysAndTablesMap.put(keyManager, table); // adding the pair key+table to the map
|
||||
}
|
||||
final GATKReportTable covReportTable = report.getTable(RecalDataManager.ALL_COVARIATES_REPORT_TABLE_TITLE);
|
||||
final NestedHashMap covTable = parseAllCovariatesTable(covReportTable);
|
||||
|
||||
|
||||
final BQSRKeyManager keyManager = new BQSRKeyManager(requiredCovariates, optionalCovariates); // initializing it's corresponding key manager
|
||||
final GATKReportTable reportTable = report.getTable(RecalDataManager.ALL_COVARIATES_REPORT_TABLE_TITLE);
|
||||
final Map<Long, RecalDatum> table = parseAllCovariatesTable(keyManager, reportTable);
|
||||
keysAndTablesMap.put(keyManager, table);
|
||||
recalibrationTables = new RecalibrationTables(rgTable, qualTable, covTable);
|
||||
}
|
||||
|
||||
protected RecalibrationReport(final QuantizationInfo quantizationInfo, final LinkedHashMap<BQSRKeyManager, Map<Long, RecalDatum>> keysAndTablesMap, final GATKReportTable argumentTable, final RecalibrationArgumentCollection RAC) {
|
||||
protected RecalibrationReport(final QuantizationInfo quantizationInfo, final RecalibrationTables recalibrationTables, final GATKReportTable argumentTable, final RecalibrationArgumentCollection RAC) {
|
||||
this.quantizationInfo = quantizationInfo;
|
||||
this.keysAndTablesMap = keysAndTablesMap;
|
||||
this.recalibrationTables = recalibrationTables;
|
||||
this.argumentTable = argumentTable;
|
||||
this.RAC = RAC;
|
||||
this.requestedCovariates = null;
|
||||
this.optionalCovariateIndexes = null;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -98,29 +91,20 @@ public class RecalibrationReport {
|
|||
*
|
||||
* @param other the recalibration report to combine with this one
|
||||
*/
|
||||
public void combine(RecalibrationReport other) {
|
||||
Iterator<Map.Entry<BQSRKeyManager, Map<Long, RecalDatum>>> thisIterator = keysAndTablesMap.entrySet().iterator();
|
||||
public void combine(final RecalibrationReport other) {
|
||||
|
||||
for (Map.Entry<BQSRKeyManager, Map<Long, RecalDatum>> otherEntry : other.getKeysAndTablesMap().entrySet()) {
|
||||
Map.Entry<BQSRKeyManager, Map<Long, RecalDatum>> thisEntry = thisIterator.next();
|
||||
for (RecalibrationTables.TableType type : RecalibrationTables.TableType.values()) {
|
||||
final NestedHashMap myTable = recalibrationTables.getTable(type);
|
||||
final NestedHashMap otherTable = other.recalibrationTables.getTable(type);
|
||||
|
||||
final Map<Long, RecalDatum> thisTable = thisEntry.getValue();
|
||||
final BQSRKeyManager thisKeyManager = thisEntry.getKey();
|
||||
final BQSRKeyManager otherKeyManager = otherEntry.getKey();
|
||||
for (final NestedHashMap.Leaf row : otherTable.getAllLeaves()) {
|
||||
final RecalDatum myDatum = (RecalDatum)myTable.get(row.keys);
|
||||
|
||||
for (Map.Entry<Long, RecalDatum> otherTableEntry : otherEntry.getValue().entrySet()) {
|
||||
final RecalDatum otherDatum = otherTableEntry.getValue();
|
||||
final Long otherBitKey = otherTableEntry.getKey();
|
||||
final List<Object> otherObjectKey = otherKeyManager.keySetFrom(otherBitKey);
|
||||
|
||||
final long thisKey = thisKeyManager.longFromKey(otherObjectKey.toArray());
|
||||
final RecalDatum thisDatum = thisTable.get(thisKey);
|
||||
|
||||
if (thisDatum == null)
|
||||
thisTable.put(thisKey, otherDatum);
|
||||
if (myDatum == null)
|
||||
myTable.put(row.value, row.keys);
|
||||
else
|
||||
thisDatum.combine(otherDatum);
|
||||
}
|
||||
myDatum.combine((RecalDatum)row.value);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -128,8 +112,8 @@ public class RecalibrationReport {
|
|||
return quantizationInfo;
|
||||
}
|
||||
|
||||
public LinkedHashMap<BQSRKeyManager, Map<Long, RecalDatum>> getKeysAndTablesMap() {
|
||||
return keysAndTablesMap;
|
||||
public RecalibrationTables getRecalibrationTables() {
|
||||
return recalibrationTables;
|
||||
}
|
||||
|
||||
public Covariate[] getRequestedCovariates() {
|
||||
|
|
@ -139,82 +123,87 @@ public class RecalibrationReport {
|
|||
/**
|
||||
* Compiles the list of keys for the Covariates table and uses the shared parsing utility to produce the actual table
|
||||
*
|
||||
* @param keyManager the key manager for this table
|
||||
* @param reportTable the GATKReport table containing data for this table
|
||||
* @return a lookup table indexed by bitsets containing the empirical quality and estimated quality reported for every key.
|
||||
*/
|
||||
private Map<Long, RecalDatum> parseAllCovariatesTable(BQSRKeyManager keyManager, GATKReportTable reportTable) {
|
||||
ArrayList<String> columnNamesOrderedList = new ArrayList<String>(5);
|
||||
columnNamesOrderedList.add(RecalDataManager.READGROUP_COLUMN_NAME);
|
||||
columnNamesOrderedList.add(RecalDataManager.QUALITY_SCORE_COLUMN_NAME);
|
||||
columnNamesOrderedList.add(RecalDataManager.COVARIATE_VALUE_COLUMN_NAME);
|
||||
columnNamesOrderedList.add(RecalDataManager.COVARIATE_NAME_COLUMN_NAME);
|
||||
columnNamesOrderedList.add(RecalDataManager.EVENT_TYPE_COLUMN_NAME);
|
||||
return genericRecalTableParsing(keyManager, reportTable, columnNamesOrderedList, false);
|
||||
private NestedHashMap parseAllCovariatesTable(final GATKReportTable reportTable) {
|
||||
final NestedHashMap result = new NestedHashMap();
|
||||
|
||||
for ( int i = 0; i < reportTable.getNumRows(); i++ ) {
|
||||
final Object rg = reportTable.get(i, RecalDataManager.READGROUP_COLUMN_NAME);
|
||||
tempCOVarray[0] = requestedCovariates[0].keyFromValue(rg);
|
||||
final Object qual = reportTable.get(i, RecalDataManager.QUALITY_SCORE_COLUMN_NAME);
|
||||
tempCOVarray[1] = requestedCovariates[1].keyFromValue(qual);
|
||||
final String covName = (String)reportTable.get(i, RecalDataManager.COVARIATE_NAME_COLUMN_NAME);
|
||||
final int covIndex = optionalCovariateIndexes.get(covName);
|
||||
tempCOVarray[2] = covIndex;
|
||||
final Object covValue = reportTable.get(i, RecalDataManager.COVARIATE_VALUE_COLUMN_NAME);
|
||||
tempCOVarray[3] = requestedCovariates[covIndex + 2].keyFromValue(covValue);
|
||||
final EventType event = EventType.eventFrom((String)reportTable.get(i, RecalDataManager.EVENT_TYPE_COLUMN_NAME));
|
||||
tempCOVarray[4] = event.index;
|
||||
|
||||
result.put(getRecalDatum(reportTable, i, false), tempCOVarray);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* Compiles the list of keys for the QualityScore table and uses the shared parsing utility to produce the actual table
|
||||
* @param keyManager the key manager for this table
|
||||
* @param reportTable the GATKReport table containing data for this table
|
||||
* @return a lookup table indexed by bitsets containing the empirical quality and estimated quality reported for every key.
|
||||
*/
|
||||
private Map<Long, RecalDatum> parseQualityScoreTable(BQSRKeyManager keyManager, GATKReportTable reportTable) {
|
||||
ArrayList<String> columnNamesOrderedList = new ArrayList<String>(3);
|
||||
columnNamesOrderedList.add(RecalDataManager.READGROUP_COLUMN_NAME);
|
||||
columnNamesOrderedList.add(RecalDataManager.QUALITY_SCORE_COLUMN_NAME);
|
||||
columnNamesOrderedList.add(RecalDataManager.EVENT_TYPE_COLUMN_NAME);
|
||||
return genericRecalTableParsing(keyManager, reportTable, columnNamesOrderedList, false);
|
||||
private NestedHashMap parseQualityScoreTable(final GATKReportTable reportTable) {
|
||||
final NestedHashMap result = new NestedHashMap();
|
||||
|
||||
for ( int i = 0; i < reportTable.getNumRows(); i++ ) {
|
||||
final Object rg = reportTable.get(i, RecalDataManager.READGROUP_COLUMN_NAME);
|
||||
tempQUALarray[0] = requestedCovariates[0].keyFromValue(rg);
|
||||
final Object qual = reportTable.get(i, RecalDataManager.QUALITY_SCORE_COLUMN_NAME);
|
||||
tempQUALarray[1] = requestedCovariates[1].keyFromValue(qual);
|
||||
final EventType event = EventType.eventFrom((String)reportTable.get(i, RecalDataManager.EVENT_TYPE_COLUMN_NAME));
|
||||
tempQUALarray[2] = event.index;
|
||||
|
||||
result.put(getRecalDatum(reportTable, i, false), tempQUALarray);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compiles the list of keys for the ReadGroup table and uses the shared parsing utility to produce the actual table
|
||||
*
|
||||
* @param keyManager the key manager for this table
|
||||
* @param reportTable the GATKReport table containing data for this table
|
||||
* @return a lookup table indexed by bitsets containing the empirical quality and estimated quality reported for every key.
|
||||
*/
|
||||
private Map<Long, RecalDatum> parseReadGroupTable(BQSRKeyManager keyManager, GATKReportTable reportTable) {
|
||||
ArrayList<String> columnNamesOrderedList = new ArrayList<String>(2);
|
||||
columnNamesOrderedList.add(RecalDataManager.READGROUP_COLUMN_NAME);
|
||||
columnNamesOrderedList.add(RecalDataManager.EVENT_TYPE_COLUMN_NAME);
|
||||
return genericRecalTableParsing(keyManager, reportTable, columnNamesOrderedList, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Shared parsing functionality for all tables.
|
||||
*
|
||||
* @param keyManager the key manager for this table
|
||||
* @param reportTable the GATKReport table containing data for this table
|
||||
* @param columnNamesOrderedList a list of columns to read from the report table and build as key for this particular table
|
||||
* @return a lookup table indexed by bitsets containing the empirical quality and estimated quality reported for every key.
|
||||
*/
|
||||
private Map<Long, RecalDatum> genericRecalTableParsing(BQSRKeyManager keyManager, GATKReportTable reportTable, ArrayList<String> columnNamesOrderedList, boolean hasEstimatedQReportedColumn) {
|
||||
final Map<Long, RecalDatum> result = new HashMap<Long, RecalDatum>(reportTable.getNumRows()*2);
|
||||
private NestedHashMap parseReadGroupTable(final GATKReportTable reportTable) {
|
||||
final NestedHashMap result = new NestedHashMap();
|
||||
|
||||
for ( int i = 0; i < reportTable.getNumRows(); i++ ) {
|
||||
final int nKeys = columnNamesOrderedList.size();
|
||||
final Object [] keySet = new Object[nKeys];
|
||||
for (int j = 0; j < nKeys; j++)
|
||||
keySet[j] = reportTable.get(i, columnNamesOrderedList.get(j)); // all these objects are okay in String format, the key manager will handle them correctly (except for the event type (see below)
|
||||
keySet[keySet.length-1] = EventType.eventFrom((String) keySet[keySet.length-1]); // the last key is always the event type. We convert the string ("M", "I" or "D") to an enum object (necessary for the key manager).
|
||||
final long bitKey = keyManager.longFromKey(keySet);
|
||||
final Object rg = reportTable.get(i, RecalDataManager.READGROUP_COLUMN_NAME);
|
||||
tempRGarray[0] = requestedCovariates[0].keyFromValue(rg);
|
||||
final EventType event = EventType.eventFrom((String)reportTable.get(i, RecalDataManager.EVENT_TYPE_COLUMN_NAME));
|
||||
tempRGarray[1] = event.index;
|
||||
|
||||
final long nObservations = (Long) reportTable.get(i, RecalDataManager.NUMBER_OBSERVATIONS_COLUMN_NAME);
|
||||
final long nErrors = (Long) reportTable.get(i, RecalDataManager.NUMBER_ERRORS_COLUMN_NAME);
|
||||
final double empiricalQuality = (Double) reportTable.get(i, RecalDataManager.EMPIRICAL_QUALITY_COLUMN_NAME);
|
||||
|
||||
final double estimatedQReported = hasEstimatedQReportedColumn ? // the estimatedQreported column only exists in the ReadGroup table
|
||||
(Double) reportTable.get(i, RecalDataManager.ESTIMATED_Q_REPORTED_COLUMN_NAME) : // we get it if we are in the read group table
|
||||
Byte.parseByte((String) reportTable.get(i, RecalDataManager.QUALITY_SCORE_COLUMN_NAME)); // or we use the reported quality if we are in any other table
|
||||
|
||||
final RecalDatum recalDatum = new RecalDatum(nObservations, nErrors, estimatedQReported, empiricalQuality);
|
||||
result.put(bitKey, recalDatum);
|
||||
result.put(getRecalDatum(reportTable, i, true), tempRGarray);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private RecalDatum getRecalDatum(final GATKReportTable reportTable, final int row, final boolean hasEstimatedQReportedColumn) {
|
||||
final long nObservations = (Long) reportTable.get(row, RecalDataManager.NUMBER_OBSERVATIONS_COLUMN_NAME);
|
||||
final long nErrors = (Long) reportTable.get(row, RecalDataManager.NUMBER_ERRORS_COLUMN_NAME);
|
||||
final double empiricalQuality = (Double) reportTable.get(row, RecalDataManager.EMPIRICAL_QUALITY_COLUMN_NAME);
|
||||
|
||||
final double estimatedQReported = hasEstimatedQReportedColumn ? // the estimatedQreported column only exists in the ReadGroup table
|
||||
(Double) reportTable.get(row, RecalDataManager.ESTIMATED_Q_REPORTED_COLUMN_NAME) : // we get it if we are in the read group table
|
||||
Byte.parseByte((String) reportTable.get(row, RecalDataManager.QUALITY_SCORE_COLUMN_NAME)); // or we use the reported quality if we are in any other table
|
||||
|
||||
return new RecalDatum(nObservations, nErrors, estimatedQReported, empiricalQuality);
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses the quantization table from the GATK Report and turns it into a map of original => quantized quality scores
|
||||
*
|
||||
|
|
@ -308,55 +297,21 @@ public class RecalibrationReport {
|
|||
* and quantization of the quality scores during every call of combine(). Very useful for the BQSRGatherer.
|
||||
*/
|
||||
public void calculateEmpiricalAndQuantizedQualities() {
|
||||
for (Map<Long, RecalDatum> table : keysAndTablesMap.values())
|
||||
for (RecalDatum datum : table.values())
|
||||
datum.calcCombinedEmpiricalQuality();
|
||||
for (RecalibrationTables.TableType type : RecalibrationTables.TableType.values()) {
|
||||
final NestedHashMap table = recalibrationTables.getTable(type);
|
||||
for (final Object value : table.getAllValues()) {
|
||||
((RecalDatum)value).calcCombinedEmpiricalQuality();
|
||||
}
|
||||
}
|
||||
|
||||
quantizationInfo = new QuantizationInfo(keysAndTablesMap, RAC.QUANTIZING_LEVELS);
|
||||
quantizationInfo = new QuantizationInfo(recalibrationTables, RAC.QUANTIZING_LEVELS);
|
||||
}
|
||||
|
||||
public void output(PrintStream output) {
|
||||
RecalDataManager.outputRecalibrationReport(argumentTable, quantizationInfo, keysAndTablesMap, output);
|
||||
RecalDataManager.outputRecalibrationReport(argumentTable, quantizationInfo, recalibrationTables, requestedCovariates, output);
|
||||
}
|
||||
|
||||
public RecalibrationArgumentCollection getRAC() {
|
||||
return RAC;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (!(o instanceof RecalibrationReport))
|
||||
return false;
|
||||
RecalibrationReport other = (RecalibrationReport) o;
|
||||
if (this == o)
|
||||
return true;
|
||||
return isEqualTable(this.keysAndTablesMap, other.keysAndTablesMap);
|
||||
}
|
||||
|
||||
private boolean isEqualTable(LinkedHashMap<BQSRKeyManager, Map<Long, RecalDatum>> t1, LinkedHashMap<BQSRKeyManager, Map<Long, RecalDatum>> t2) {
|
||||
if (t1.size() != t2.size())
|
||||
return false;
|
||||
|
||||
final Iterator<Map.Entry<BQSRKeyManager, Map<Long, RecalDatum>>> t1Iterator = t1.entrySet().iterator();
|
||||
final Iterator<Map.Entry<BQSRKeyManager, Map<Long, RecalDatum>>> t2Iterator = t2.entrySet().iterator();
|
||||
|
||||
while (t1Iterator.hasNext() && t2Iterator.hasNext()) {
|
||||
Map.Entry<BQSRKeyManager, Map<Long, RecalDatum>> t1MapEntry = t1Iterator.next();
|
||||
Map.Entry<BQSRKeyManager, Map<Long, RecalDatum>> t2MapEntry = t2Iterator.next();
|
||||
|
||||
if (!(t1MapEntry.getKey().equals(t2MapEntry.getKey())))
|
||||
return false;
|
||||
|
||||
final Map<Long, RecalDatum> table2 = t2MapEntry.getValue();
|
||||
for (Map.Entry<Long, RecalDatum> t1TableEntry : t1MapEntry.getValue().entrySet()) {
|
||||
final Long t1Key = t1TableEntry.getKey();
|
||||
if (!table2.containsKey(t1Key))
|
||||
return false;
|
||||
final RecalDatum t1Datum = t1TableEntry.getValue();
|
||||
if (!t1Datum.equals(table2.get(t1Key)))
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -266,13 +266,13 @@ public class DiagnoseTargets extends LocusWalker<Long, Long> {
|
|||
|
||||
alleles.add(refAllele);
|
||||
alleles.add(SYMBOLIC_ALLELE);
|
||||
VariantContextBuilder vcb = new VariantContextBuilder("DiagnoseTargets", interval.getContig(), interval.getStart(), interval.getStart(), alleles);
|
||||
VariantContextBuilder vcb = new VariantContextBuilder("DiagnoseTargets", interval.getContig(), interval.getStart(), interval.getStop(), alleles);
|
||||
|
||||
vcb = vcb.log10PError(VariantContext.NO_LOG10_PERROR); // QUAL field makes no sense in our VCF
|
||||
vcb.filters(new HashSet<String>(statusesToStrings(stats.callableStatuses(thresholds))));
|
||||
vcb.filters(new HashSet<String>(statusesToStrings(stats.callableStatuses(thresholds), true)));
|
||||
|
||||
attributes.put(VCFConstants.END_KEY, interval.getStop());
|
||||
attributes.put(VCFConstants.DEPTH_KEY, stats.averageCoverage());
|
||||
attributes.put(ThresHolder.AVG_INTERVAL_DP_KEY, stats.averageCoverage());
|
||||
|
||||
vcb = vcb.attributes(attributes);
|
||||
if (debug) {
|
||||
|
|
@ -282,7 +282,7 @@ public class DiagnoseTargets extends LocusWalker<Long, Long> {
|
|||
final GenotypeBuilder gb = new GenotypeBuilder(sample);
|
||||
|
||||
SampleStatistics sampleStat = stats.getSample(sample);
|
||||
gb.DP((int)sampleStat.averageCoverage());
|
||||
gb.attribute(ThresHolder.AVG_INTERVAL_DP_KEY, sampleStat.averageCoverage());
|
||||
gb.attribute("Q1", sampleStat.getQuantileDepth(0.25));
|
||||
gb.attribute("MED", sampleStat.getQuantileDepth(0.50));
|
||||
gb.attribute("Q3", sampleStat.getQuantileDepth(0.75));
|
||||
|
|
@ -290,7 +290,7 @@ public class DiagnoseTargets extends LocusWalker<Long, Long> {
|
|||
if (debug) {
|
||||
System.out.printf("Found %d bad mates out of %d reads %n", sampleStat.getnBadMates(), sampleStat.getnReads());
|
||||
}
|
||||
gb.filters(statusesToStrings(stats.getSample(sample).getCallableStatuses(thresholds)));
|
||||
gb.filters(statusesToStrings(stats.getSample(sample).getCallableStatuses(thresholds), false));
|
||||
|
||||
genotypes.add(gb.make());
|
||||
}
|
||||
|
|
@ -307,11 +307,12 @@ public class DiagnoseTargets extends LocusWalker<Long, Long> {
|
|||
* @param statuses the set of statuses to be converted
|
||||
* @return a matching set of strings
|
||||
*/
|
||||
private List<String> statusesToStrings(Set<CallableStatus> statuses) {
|
||||
private List<String> statusesToStrings(Set<CallableStatus> statuses, final boolean includePASS) {
|
||||
List<String> output = new ArrayList<String>(statuses.size());
|
||||
|
||||
for (CallableStatus status : statuses)
|
||||
output.add(status.name());
|
||||
if ( includePASS || status != CallableStatus.PASS ) // adding pass => results in a filter for genotypes
|
||||
output.add(status.name());
|
||||
|
||||
return output;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -31,6 +31,7 @@ import java.util.HashSet;
|
|||
import java.util.Set;
|
||||
|
||||
class ThresHolder {
|
||||
public static final String AVG_INTERVAL_DP_KEY = "AVG_INTERVAL_DP";
|
||||
public static final ThresHolder DEFAULTS = new ThresHolder(20, 20, 5, 700, 20, 50, 0.5, 0.2, 0.5, 0.2, 0.2, 0.5);
|
||||
|
||||
private final int minimumBaseQuality;
|
||||
|
|
@ -129,12 +130,13 @@ class ThresHolder {
|
|||
|
||||
// INFO fields for overall data
|
||||
headerLines.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.END_KEY));
|
||||
headerLines.add(new VCFInfoHeaderLine("AVG_INTERVAL_DP", 1, VCFHeaderLineType.Float, "Average depth across the interval. Sum of the depth in a loci divided by interval size."));
|
||||
headerLines.add(new VCFInfoHeaderLine(AVG_INTERVAL_DP_KEY, 1, VCFHeaderLineType.Float, "Average depth across the interval. Sum of the depth in a loci divided by interval size."));
|
||||
headerLines.add(new VCFInfoHeaderLine("Diagnose Targets", 0, VCFHeaderLineType.Flag, "DiagnoseTargets mode"));
|
||||
|
||||
// FORMAT fields for each genotype
|
||||
// todo -- find the appropriate VCF constants
|
||||
headerLines.add(new VCFFormatHeaderLine("AVG_INTERVAL_DP", 1, VCFHeaderLineType.Float, "Average depth across the interval. Sum of the depth in a loci divided by interval size."));
|
||||
headerLines.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_FILTER_KEY));
|
||||
headerLines.add(new VCFFormatHeaderLine(AVG_INTERVAL_DP_KEY, 1, VCFHeaderLineType.Float, "Average depth across the interval. Sum of the depth in a loci divided by interval size."));
|
||||
headerLines.add(new VCFFormatHeaderLine("Q1", 1, VCFHeaderLineType.Float, "Lower Quartile of depth distribution."));
|
||||
headerLines.add(new VCFFormatHeaderLine("MED", 1, VCFHeaderLineType.Float, "Median of depth distribution."));
|
||||
headerLines.add(new VCFFormatHeaderLine("Q3", 1, VCFHeaderLineType.Float, "Upper Quartile of depth Distribution."));
|
||||
|
|
|
|||
|
|
@ -64,9 +64,10 @@ public class VCFDiffableReader implements DiffableReader {
|
|||
root.add("VERSION", version);
|
||||
br.close();
|
||||
|
||||
// must be read as state is stored in reader itself
|
||||
AbstractVCFCodec.disableOnTheFlyModifications();
|
||||
FeatureReader<VariantContext> reader = AbstractFeatureReader.getFeatureReader(file.getAbsolutePath(), new VCFCodec(), false);
|
||||
final VCFCodec vcfCodec = new VCFCodec();
|
||||
vcfCodec.disableOnTheFlyModifications(); // must be read as state is stored in reader itself
|
||||
|
||||
FeatureReader<VariantContext> reader = AbstractFeatureReader.getFeatureReader(file.getAbsolutePath(), vcfCodec, false);
|
||||
VCFHeader header = (VCFHeader)reader.getHeader();
|
||||
for ( VCFHeaderLine headerLine : header.getMetaData() ) {
|
||||
String key = headerLine.getKey();
|
||||
|
|
|
|||
|
|
@ -176,7 +176,7 @@ public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
|
|||
hInfo.add(new VCFFilterHeaderLine(exp.name, exp.exp.toString()));
|
||||
|
||||
if ( genotypeFilterExps.size() > 0 )
|
||||
hInfo.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_FILTER_KEY, 1, VCFHeaderLineType.String, "Genotype-level filter"));
|
||||
hInfo.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_FILTER_KEY));
|
||||
|
||||
if ( mask.isBound() ) {
|
||||
hInfo.add(new VCFFilterHeaderLine(MASK_NAME, "Overlaps a user-input mask"));
|
||||
|
|
|
|||
|
|
@ -187,6 +187,8 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
|
|||
// the annotation engine
|
||||
private VariantAnnotatorEngine annotationEngine;
|
||||
|
||||
private Set<String> samples;
|
||||
|
||||
// enable deletions in the pileup
|
||||
@Override
|
||||
public boolean includeReadsWithDeletionAtLoci() { return true; }
|
||||
|
|
@ -231,7 +233,7 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
|
|||
logger.warn("WARNING: note that the EMIT_ALL_SITES option is intended only for point mutations (SNPs) in DISCOVERY mode or generally when running in GENOTYPE_GIVEN_ALLELES mode; it will by no means produce a comprehensive set of indels in DISCOVERY mode");
|
||||
|
||||
// get all of the unique sample names
|
||||
Set<String> samples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader());
|
||||
samples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader());
|
||||
|
||||
// initialize the verbose writer
|
||||
if ( verboseWriter != null )
|
||||
|
|
@ -298,7 +300,7 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
|
|||
* @return the VariantCallContext object
|
||||
*/
|
||||
public List<VariantCallContext> map(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext) {
|
||||
return UG_engine.calculateLikelihoodsAndGenotypes(tracker, refContext, rawContext);
|
||||
return UG_engine.calculateLikelihoodsAndGenotypes(tracker, refContext, rawContext, samples);
|
||||
}
|
||||
|
||||
public UGStatistics reduceInit() { return new UGStatistics(); }
|
||||
|
|
|
|||
|
|
@ -140,14 +140,39 @@ public class UnifiedGenotyperEngine {
|
|||
}
|
||||
|
||||
/**
|
||||
* Compute full calls at a given locus. Entry point for engine calls from the UnifiedGenotyper.
|
||||
* @see #calculateLikelihoodsAndGenotypes(org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker, org.broadinstitute.sting.gatk.contexts.ReferenceContext, org.broadinstitute.sting.gatk.contexts.AlignmentContext, java.util.Set)
|
||||
*
|
||||
* @param tracker the meta data tracker
|
||||
* @param refContext the reference base
|
||||
* @param rawContext contextual information around the locus
|
||||
* @return the VariantCallContext object
|
||||
* same as the full call but with allSamples == null
|
||||
*
|
||||
* @param tracker
|
||||
* @param refContext
|
||||
* @param rawContext
|
||||
* @return
|
||||
*/
|
||||
public List<VariantCallContext> calculateLikelihoodsAndGenotypes(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext) {
|
||||
public List<VariantCallContext> calculateLikelihoodsAndGenotypes(final RefMetaDataTracker tracker,
|
||||
final ReferenceContext refContext,
|
||||
final AlignmentContext rawContext) {
|
||||
return calculateLikelihoodsAndGenotypes(tracker, refContext, rawContext, null);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Compute full calls at a given locus. Entry point for engine calls from the UnifiedGenotyper.
|
||||
*
|
||||
* If allSamples != null, then the output variantCallContext is guarenteed to contain a genotype
|
||||
* for every sample in allSamples. If it's null there's no such guarentee. Providing this
|
||||
* argument is critical when the resulting calls will be written to a VCF file.
|
||||
*
|
||||
* @param tracker the meta data tracker
|
||||
* @param refContext the reference base
|
||||
* @param rawContext contextual information around the locus
|
||||
* @param allSamples set of all sample names that we might call (i.e., those in the VCF header)
|
||||
* @return the VariantCallContext object
|
||||
*/
|
||||
public List<VariantCallContext> calculateLikelihoodsAndGenotypes(final RefMetaDataTracker tracker,
|
||||
final ReferenceContext refContext,
|
||||
final AlignmentContext rawContext,
|
||||
final Set<String> allSamples) {
|
||||
final List<VariantCallContext> results = new ArrayList<VariantCallContext>(2);
|
||||
|
||||
final List<GenotypeLikelihoodsCalculationModel.Model> models = getGLModelsToUse(tracker, refContext, rawContext);
|
||||
|
|
@ -168,7 +193,23 @@ public class UnifiedGenotyperEngine {
|
|||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
return addMissingSamples(results, allSamples);
|
||||
}
|
||||
|
||||
private List<VariantCallContext> addMissingSamples(final List<VariantCallContext> calls, final Set<String> allSamples) {
|
||||
if ( calls.isEmpty() || allSamples == null ) return calls;
|
||||
|
||||
final List<VariantCallContext> withAllSamples = new ArrayList<VariantCallContext>(calls.size());
|
||||
for ( final VariantCallContext call : calls ) {
|
||||
if ( call == null )
|
||||
withAllSamples.add(call);
|
||||
else {
|
||||
final VariantContext withoutMissing = VariantContextUtils.addMissingSamples(call, allSamples);
|
||||
withAllSamples.add(new VariantCallContext(withoutMissing, call.confidentlyCalled, call.shouldEmit));
|
||||
}
|
||||
}
|
||||
|
||||
return withAllSamples;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -261,6 +261,7 @@ public class GenotypeAndValidateWalker extends RodWalker<GenotypeAndValidateWalk
|
|||
|
||||
private UnifiedGenotyperEngine snpEngine;
|
||||
private UnifiedGenotyperEngine indelEngine;
|
||||
private Set<String> samples;
|
||||
|
||||
public static class CountedData {
|
||||
private long nAltCalledAlt = 0L;
|
||||
|
|
@ -307,7 +308,7 @@ public class GenotypeAndValidateWalker extends RodWalker<GenotypeAndValidateWalk
|
|||
// Initialize VCF header
|
||||
if (vcfWriter != null) {
|
||||
Map<String, VCFHeader> header = VCFUtils.getVCFHeadersFromRodPrefix(getToolkit(), alleles.getName());
|
||||
Set<String> samples = SampleUtils.getSampleList(header, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE);
|
||||
samples = SampleUtils.getSampleList(header, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE);
|
||||
Set<VCFHeaderLine> headerLines = VCFUtils.smartMergeHeaders(header.values(), logger);
|
||||
headerLines.add(new VCFHeaderLine("source", "GenotypeAndValidate"));
|
||||
vcfWriter.writeHeader(new VCFHeader(headerLines, samples));
|
||||
|
|
|
|||
|
|
@ -174,17 +174,24 @@ public class CombineVariants extends RodWalker<Integer, Integer> {
|
|||
|
||||
/** Optimization to strip out genotypes before merging if we are doing a sites_only output */
|
||||
private boolean sitesOnlyVCF = false;
|
||||
private Set<String> samples;
|
||||
|
||||
public void initialize() {
|
||||
Map<String, VCFHeader> vcfRods = VCFUtils.getVCFHeadersFromRods(getToolkit());
|
||||
|
||||
if ( vcfWriter instanceof VariantContextWriterStub) {
|
||||
sitesOnlyVCF = ((VariantContextWriterStub)vcfWriter).getWriterOptions().contains(Options.DO_NOT_WRITE_GENOTYPES);
|
||||
if ( sitesOnlyVCF ) logger.info("Pre-stripping genotypes for performance");
|
||||
} else
|
||||
logger.warn("VCF output file not an instance of VCFWriterStub; cannot enable sites only output option");
|
||||
|
||||
if ( PRIORITY_STRING == null ) {
|
||||
PRIORITY_STRING = Utils.join(",", vcfRods.keySet());
|
||||
logger.info("Priority string not provided, using arbitrary genotyping order: " + PRIORITY_STRING);
|
||||
}
|
||||
|
||||
validateAnnotateUnionArguments();
|
||||
Set<String> samples = SampleUtils.getSampleList(vcfRods, genotypeMergeOption);
|
||||
samples = sitesOnlyVCF ? Collections.<String>emptySet() : SampleUtils.getSampleList(vcfRods, genotypeMergeOption);
|
||||
|
||||
if ( SET_KEY.toLowerCase().equals("null") )
|
||||
SET_KEY = null;
|
||||
|
|
@ -194,15 +201,9 @@ public class CombineVariants extends RodWalker<Integer, Integer> {
|
|||
headerLines.add(new VCFInfoHeaderLine(SET_KEY, 1, VCFHeaderLineType.String, "Source VCF for the merged record in CombineVariants"));
|
||||
if ( !ASSUME_IDENTICAL_SAMPLES )
|
||||
headerLines.addAll(Arrays.asList(ChromosomeCounts.descriptions));
|
||||
VCFHeader vcfHeader = new VCFHeader(headerLines, sitesOnlyVCF ? Collections.<String>emptySet() : samples);
|
||||
VCFHeader vcfHeader = new VCFHeader(headerLines, samples);
|
||||
vcfHeader.setWriteCommandLine(!SUPPRESS_COMMAND_LINE_HEADER);
|
||||
vcfWriter.writeHeader(vcfHeader);
|
||||
|
||||
if ( vcfWriter instanceof VariantContextWriterStub) {
|
||||
sitesOnlyVCF = ((VariantContextWriterStub)vcfWriter).getWriterOptions().contains(Options.DO_NOT_WRITE_GENOTYPES);
|
||||
if ( sitesOnlyVCF ) logger.info("Pre-stripping genotypes for performance");
|
||||
} else
|
||||
logger.warn("VCF output file not an instance of VCFWriterStub; cannot enable sites only output option");
|
||||
}
|
||||
|
||||
private void validateAnnotateUnionArguments() {
|
||||
|
|
@ -296,7 +297,7 @@ public class CombineVariants extends RodWalker<Integer, Integer> {
|
|||
VariantContextUtils.calculateChromosomeCounts(builder, false);
|
||||
if ( minimalVCF )
|
||||
VariantContextUtils.pruneVariantContext(builder, Arrays.asList(SET_KEY));
|
||||
vcfWriter.add(builder.make());
|
||||
vcfWriter.add(VariantContextUtils.addMissingSamples(builder.make(), samples));
|
||||
}
|
||||
|
||||
return vcs.isEmpty() ? 0 : 1;
|
||||
|
|
|
|||
|
|
@ -510,7 +510,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
|||
for (VariantContext vc : vcs) {
|
||||
// an option for performance testing only
|
||||
if ( fullyDecode )
|
||||
vc = vc.fullyDecode(vcfRods.get(vc.getSource()));
|
||||
vc = vc.fullyDecode(vcfRods.get(vc.getSource()), getToolkit().lenientVCFProcessing() );
|
||||
|
||||
// an option for performance testing only
|
||||
if ( forceGenotypesDecode ) {
|
||||
|
|
|
|||
|
|
@ -108,6 +108,7 @@ public class VariantsToVCF extends RodWalker<Integer, Integer> {
|
|||
|
||||
private Set<String> allowedGenotypeFormatStrings = new HashSet<String>();
|
||||
private boolean wroteHeader = false;
|
||||
private Set<String> samples;
|
||||
|
||||
// for dealing with indels in hapmap
|
||||
CloseableIterator<GATKFeature> dbsnpIterator = null;
|
||||
|
|
@ -228,7 +229,7 @@ public class VariantsToVCF extends RodWalker<Integer, Integer> {
|
|||
}
|
||||
}
|
||||
|
||||
Set<String> samples = new LinkedHashSet<String>();
|
||||
samples = new LinkedHashSet<String>();
|
||||
if ( sampleName != null ) {
|
||||
samples.add(sampleName);
|
||||
} else {
|
||||
|
|
@ -252,6 +253,7 @@ public class VariantsToVCF extends RodWalker<Integer, Integer> {
|
|||
}
|
||||
|
||||
vc = VariantContextUtils.purgeUnallowedGenotypeAttributes(vc, allowedGenotypeFormatStrings);
|
||||
vc = VariantContextUtils.addMissingSamples(vc, samples);
|
||||
vcfwriter.add(vc);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -2,6 +2,8 @@ package org.broadinstitute.sting.utils;
|
|||
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
* BaseUtils contains some basic utilities for manipulating nucleotides.
|
||||
*/
|
||||
|
|
@ -47,6 +49,20 @@ public class BaseUtils {
|
|||
public boolean sameBase(int i) { return index == i; }
|
||||
}
|
||||
|
||||
static private final int[] baseIndexMap = new int[256];
|
||||
static {
|
||||
Arrays.fill(baseIndexMap, -1);
|
||||
baseIndexMap['A'] = 0;
|
||||
baseIndexMap['a'] = 0;
|
||||
baseIndexMap['*'] = 0; // the wildcard character counts as an A
|
||||
baseIndexMap['C'] = 1;
|
||||
baseIndexMap['c'] = 1;
|
||||
baseIndexMap['G'] = 2;
|
||||
baseIndexMap['g'] = 2;
|
||||
baseIndexMap['T'] = 3;
|
||||
baseIndexMap['t'] = 3;
|
||||
}
|
||||
|
||||
// todo -- fix me (enums?)
|
||||
public static final byte DELETION_INDEX = 4;
|
||||
public static final byte NO_CALL_INDEX = 5; // (this is 'N')
|
||||
|
|
@ -182,27 +198,7 @@ public class BaseUtils {
|
|||
* @return 0, 1, 2, 3, or -1 if the base can't be understood
|
||||
*/
|
||||
static public int simpleBaseToBaseIndex(byte base) {
|
||||
switch (base) {
|
||||
case '*': // the wildcard character counts as an A
|
||||
case 'A':
|
||||
case 'a':
|
||||
return 0;
|
||||
|
||||
case 'C':
|
||||
case 'c':
|
||||
return 1;
|
||||
|
||||
case 'G':
|
||||
case 'g':
|
||||
return 2;
|
||||
|
||||
case 'T':
|
||||
case 't':
|
||||
return 3;
|
||||
|
||||
default:
|
||||
return -1;
|
||||
}
|
||||
return baseIndexMap[base];
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -213,27 +209,7 @@ public class BaseUtils {
|
|||
*/
|
||||
@Deprecated
|
||||
static public int simpleBaseToBaseIndex(char base) {
|
||||
switch (base) {
|
||||
case '*': // the wildcard character counts as an A
|
||||
case 'A':
|
||||
case 'a':
|
||||
return 0;
|
||||
|
||||
case 'C':
|
||||
case 'c':
|
||||
return 1;
|
||||
|
||||
case 'G':
|
||||
case 'g':
|
||||
return 2;
|
||||
|
||||
case 'T':
|
||||
case 't':
|
||||
return 3;
|
||||
|
||||
default:
|
||||
return -1;
|
||||
}
|
||||
return baseIndexMap[base];
|
||||
}
|
||||
|
||||
static public int extendedBaseToBaseIndex(byte base) {
|
||||
|
|
@ -284,11 +260,6 @@ public class BaseUtils {
|
|||
}
|
||||
}
|
||||
|
||||
@Deprecated
|
||||
static public char baseIndexToSimpleBaseAsChar(int baseIndex) {
|
||||
return (char) baseIndexToSimpleBase(baseIndex);
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts a base index to a base index representing its cross-talk partner
|
||||
*
|
||||
|
|
|
|||
|
|
@ -101,15 +101,7 @@ public final class BCF2Codec implements FeatureCodec<VariantContext>, ReferenceD
|
|||
|
||||
@Override
|
||||
public Feature decodeLoc( final PositionalBufferedStream inputStream ) {
|
||||
recordNo++;
|
||||
final VariantContextBuilder builder = new VariantContextBuilder();
|
||||
|
||||
final int sitesBlockSize = decoder.readBlockSize(inputStream);
|
||||
final int genotypeBlockSize = decoder.readBlockSize(inputStream); // necessary because it's in the stream
|
||||
decoder.readNextBlock(sitesBlockSize, inputStream);
|
||||
decodeSiteLoc(builder);
|
||||
|
||||
return builder.fullyDecoded(true).make();
|
||||
return decode(inputStream);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
|||
|
|
@ -136,6 +136,10 @@ public final class BCF2Decoder {
|
|||
|
||||
public final Object decodeTypedValue(final byte typeDescriptor) {
|
||||
final int size = decodeNumberOfElements(typeDescriptor);
|
||||
return decodeTypedValue(typeDescriptor, size);
|
||||
}
|
||||
|
||||
public final Object decodeTypedValue(final byte typeDescriptor, final int size) {
|
||||
final BCF2Type type = BCF2Utils.decodeType(typeDescriptor);
|
||||
|
||||
assert size >= 0;
|
||||
|
|
@ -285,8 +289,7 @@ public final class BCF2Decoder {
|
|||
}
|
||||
}
|
||||
|
||||
public final int[] decodeIntArray(final byte typeDescriptor) {
|
||||
final int size = decodeNumberOfElements(typeDescriptor);
|
||||
public final int[] decodeIntArray(final byte typeDescriptor, final int size) {
|
||||
final BCF2Type type = BCF2Utils.decodeType(typeDescriptor);
|
||||
return decodeIntArray(size, type, null);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -104,19 +104,17 @@ public class BCF2GenotypeFieldDecoders {
|
|||
final String field,
|
||||
final BCF2Decoder decoder,
|
||||
final byte typeDescriptor,
|
||||
final int numElements,
|
||||
final GenotypeBuilder[] gbs);
|
||||
}
|
||||
|
||||
private class GTDecoder implements Decoder {
|
||||
@Override
|
||||
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final GenotypeBuilder[] gbs) {
|
||||
// we have to do a bit of low-level processing here as we want to know the size upfronta
|
||||
final int ploidy = decoder.decodeNumberOfElements(typeDescriptor);
|
||||
|
||||
if ( ENABLE_FASTPATH_GT && siteAlleles.size() == 2 && ploidy == 2 && gbs.length >= MIN_SAMPLES_FOR_FASTPATH_GENOTYPES )
|
||||
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) {
|
||||
if ( ENABLE_FASTPATH_GT && siteAlleles.size() == 2 && numElements == 2 && gbs.length >= MIN_SAMPLES_FOR_FASTPATH_GENOTYPES )
|
||||
fastBiallelicDiploidDecode(siteAlleles, decoder, typeDescriptor, gbs);
|
||||
else {
|
||||
generalDecode(siteAlleles, ploidy, decoder, typeDescriptor, gbs);
|
||||
generalDecode(siteAlleles, numElements, decoder, typeDescriptor, gbs);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -218,7 +216,7 @@ public class BCF2GenotypeFieldDecoders {
|
|||
|
||||
private class DPDecoder implements Decoder {
|
||||
@Override
|
||||
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final GenotypeBuilder[] gbs) {
|
||||
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) {
|
||||
for ( final GenotypeBuilder gb : gbs ) {
|
||||
// the -1 is for missing
|
||||
gb.DP(decoder.decodeInt(typeDescriptor, -1));
|
||||
|
|
@ -228,7 +226,7 @@ public class BCF2GenotypeFieldDecoders {
|
|||
|
||||
private class GQDecoder implements Decoder {
|
||||
@Override
|
||||
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final GenotypeBuilder[] gbs) {
|
||||
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) {
|
||||
for ( final GenotypeBuilder gb : gbs ) {
|
||||
// the -1 is for missing
|
||||
gb.GQ(decoder.decodeInt(typeDescriptor, -1));
|
||||
|
|
@ -238,27 +236,27 @@ public class BCF2GenotypeFieldDecoders {
|
|||
|
||||
private class ADDecoder implements Decoder {
|
||||
@Override
|
||||
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final GenotypeBuilder[] gbs) {
|
||||
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) {
|
||||
for ( final GenotypeBuilder gb : gbs ) {
|
||||
gb.AD(decoder.decodeIntArray(typeDescriptor));
|
||||
gb.AD(decoder.decodeIntArray(typeDescriptor, numElements));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private class PLDecoder implements Decoder {
|
||||
@Override
|
||||
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final GenotypeBuilder[] gbs) {
|
||||
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) {
|
||||
for ( final GenotypeBuilder gb : gbs ) {
|
||||
gb.PL(decoder.decodeIntArray(typeDescriptor));
|
||||
gb.PL(decoder.decodeIntArray(typeDescriptor, numElements));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private class GenericDecoder implements Decoder {
|
||||
@Override
|
||||
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final GenotypeBuilder[] gbs) {
|
||||
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) {
|
||||
for ( final GenotypeBuilder gb : gbs ) {
|
||||
Object value = decoder.decodeTypedValue(typeDescriptor);
|
||||
Object value = decoder.decodeTypedValue(typeDescriptor, numElements);
|
||||
if ( value != null ) { // don't add missing values
|
||||
if ( value instanceof List && ((List)value).size() == 1) {
|
||||
// todo -- I really hate this, and it suggests that the code isn't completely right
|
||||
|
|
@ -275,9 +273,9 @@ public class BCF2GenotypeFieldDecoders {
|
|||
|
||||
private class FTDecoder implements Decoder {
|
||||
@Override
|
||||
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final GenotypeBuilder[] gbs) {
|
||||
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) {
|
||||
for ( final GenotypeBuilder gb : gbs ) {
|
||||
Object value = decoder.decodeTypedValue(typeDescriptor);
|
||||
Object value = decoder.decodeTypedValue(typeDescriptor, numElements);
|
||||
if ( value != null ) { // don't add missing values
|
||||
gb.filters(value instanceof String ? Collections.singletonList((String)value) : (List<String>)value);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -77,9 +77,10 @@ class BCF2LazyGenotypesDecoder implements LazyGenotypesContext.LazyParser {
|
|||
|
||||
// the type of each element
|
||||
final byte typeDescriptor = decoder.readTypeDescriptor();
|
||||
final int numElements = decoder.decodeNumberOfElements(typeDescriptor);
|
||||
final BCF2GenotypeFieldDecoders.Decoder fieldDecoder = codec.getGenotypeFieldDecoder(field);
|
||||
try {
|
||||
fieldDecoder.decode(siteAlleles, field, decoder, typeDescriptor, builders);
|
||||
fieldDecoder.decode(siteAlleles, field, decoder, typeDescriptor, numElements, builders);
|
||||
} catch ( ClassCastException e ) {
|
||||
throw new UserException.MalformedBCF2("BUG: expected encoding of field " + field
|
||||
+ " inconsistent with the value observed in the decoded value");
|
||||
|
|
|
|||
|
|
@ -32,10 +32,7 @@ import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine;
|
|||
import org.broadinstitute.sting.utils.codecs.vcf.VCFIDHeaderLine;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.io.*;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
|
|
@ -200,17 +197,35 @@ public final class BCF2Utils {
|
|||
* foo.vcf => foo.bcf
|
||||
* foo.xxx => foo.xxx.bcf
|
||||
*
|
||||
* If the resulting BCF file cannot be written, return null. Happens
|
||||
* when vcfFile = /dev/null for example
|
||||
*
|
||||
* @param vcfFile
|
||||
* @return
|
||||
* @return the BCF
|
||||
*/
|
||||
@Requires("vcfFile != null")
|
||||
@Ensures("result != null")
|
||||
public static final File shadowBCF(final File vcfFile) {
|
||||
final String path = vcfFile.getAbsolutePath();
|
||||
if ( path.contains(".vcf") )
|
||||
return new File(path.replace(".vcf", ".bcf"));
|
||||
else
|
||||
return new File( path + ".bcf" );
|
||||
else {
|
||||
final File bcf = new File( path + ".bcf" );
|
||||
if ( bcf.canRead() )
|
||||
return bcf;
|
||||
else {
|
||||
try {
|
||||
// this is the only way to robustly decide if we could actually write to BCF
|
||||
final FileOutputStream o = new FileOutputStream(bcf);
|
||||
o.close();
|
||||
bcf.delete();
|
||||
return bcf;
|
||||
} catch ( FileNotFoundException e ) {
|
||||
return null;
|
||||
} catch ( IOException e ) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Ensures("BCF2Type.INTEGERS.contains(result)")
|
||||
|
|
|
|||
|
|
@ -22,7 +22,6 @@ import java.util.zip.GZIPInputStream;
|
|||
|
||||
public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext> implements NameAwareCodec {
|
||||
public final static int MAX_ALLELE_SIZE_BEFORE_WARNING = (int)Math.pow(2, 20);
|
||||
protected static boolean doOnTheFlyModifications = true;
|
||||
|
||||
protected final static Logger log = Logger.getLogger(AbstractVCFCodec.class);
|
||||
protected final static int NUM_STANDARD_FIELDS = 8; // INFO is the 8th column
|
||||
|
|
@ -61,6 +60,11 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
|
|||
|
||||
protected boolean warnedAboutNoEqualsForNonFlag = false;
|
||||
|
||||
/**
|
||||
* If true, then we'll magically fix up VCF headers on the fly when we read them in
|
||||
*/
|
||||
protected boolean doOnTheFlyModifications = true;
|
||||
|
||||
protected AbstractVCFCodec() {
|
||||
super(VariantContext.class);
|
||||
}
|
||||
|
|
@ -850,7 +854,7 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
|
|||
* of VCF records. Useful primarily for raw comparisons such as when comparing
|
||||
* raw VCF records
|
||||
*/
|
||||
public static final void disableOnTheFlyModifications() {
|
||||
public final void disableOnTheFlyModifications() {
|
||||
doOnTheFlyModifications = false;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -28,6 +28,8 @@ import org.apache.log4j.Logger;
|
|||
import org.broad.tribble.TribbleException;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.variantcontext.GenotypeLikelihoods;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.LinkedHashMap;
|
||||
|
|
@ -67,17 +69,30 @@ public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCF
|
|||
return count;
|
||||
}
|
||||
|
||||
// utility method
|
||||
public int getCount(int numAltAlleles) {
|
||||
int myCount;
|
||||
/**
|
||||
* Get the number of values expected for this header field, given the properties of VariantContext vc
|
||||
*
|
||||
* If the count is a fixed count, return that. For example, a field with size of 1 in the header returns 1
|
||||
* If the count is of type A, return vc.getNAlleles - 1
|
||||
* If the count is of type G, return the expected number of genotypes given the number of alleles in VC and the
|
||||
* max ploidy among all samples. Note that if the max ploidy of the VC is 0 (there's no GT information
|
||||
* at all, then implicitly assume diploid samples when computing G values.
|
||||
* If the count is UNBOUNDED return -1
|
||||
*
|
||||
* @param vc
|
||||
* @return
|
||||
*/
|
||||
public int getCount(final VariantContext vc) {
|
||||
switch ( countType ) {
|
||||
case INTEGER: myCount = count; break;
|
||||
case UNBOUNDED: myCount = -1; break;
|
||||
case A: myCount = numAltAlleles; break;
|
||||
case G: myCount = ((numAltAlleles + 1) * (numAltAlleles + 2) / 2); break;
|
||||
default: throw new ReviewedStingException("Unknown count type: " + countType);
|
||||
case INTEGER: return count;
|
||||
case UNBOUNDED: return -1;
|
||||
case A: return vc.getNAlleles() - 1;
|
||||
case G:
|
||||
final int ploidy = vc.getMaxPloidy();
|
||||
return GenotypeLikelihoods.numLikelihoods(vc.getNAlleles(), ploidy == 0 ? 2 : ploidy);
|
||||
default:
|
||||
throw new ReviewedStingException("Unknown count type: " + countType);
|
||||
}
|
||||
return myCount;
|
||||
}
|
||||
|
||||
public void setNumberToUnbounded() {
|
||||
|
|
|
|||
|
|
@ -183,6 +183,7 @@ public class VCFStandardHeaderLines {
|
|||
registerStandard(new VCFFormatHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Approximate read depth (reads with MQ=255 or with bad mates are filtered)"));
|
||||
registerStandard(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_PL_KEY, VCFHeaderLineCount.G, VCFHeaderLineType.Integer, "Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification"));
|
||||
registerStandard(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_ALLELE_DEPTHS, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "Allelic depths for the ref and alt alleles in the order listed"));
|
||||
registerStandard(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_FILTER_KEY, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Genotype-level filter"));
|
||||
|
||||
// INFO lines
|
||||
registerStandard(new VCFInfoHeaderLine(VCFConstants.END_KEY, 1, VCFHeaderLineType.Integer, "Stop position of the interval"));
|
||||
|
|
|
|||
|
|
@ -25,7 +25,9 @@
|
|||
|
||||
package org.broadinstitute.sting.utils.collections;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
|
|
@ -83,4 +85,53 @@ public class NestedHashMap {
|
|||
|
||||
return value; // todo -- should never reach this point
|
||||
}
|
||||
|
||||
public List<Object> getAllValues() {
|
||||
List<Object> result = new ArrayList<Object>();
|
||||
fillAllValues(data, result);
|
||||
return result;
|
||||
}
|
||||
|
||||
private void fillAllValues(final Map map, final List<Object> result) {
|
||||
for ( Object value : map.values() ) {
|
||||
if ( value == null )
|
||||
continue;
|
||||
if ( value instanceof Map )
|
||||
fillAllValues((Map)value, result);
|
||||
else
|
||||
result.add(value);
|
||||
}
|
||||
}
|
||||
|
||||
public static class Leaf {
|
||||
public final List<Object> keys;
|
||||
public final Object value;
|
||||
|
||||
public Leaf(final List<Object> keys, final Object value) {
|
||||
this.keys = keys;
|
||||
this.value = value;
|
||||
}
|
||||
}
|
||||
|
||||
public List<Leaf> getAllLeaves() {
|
||||
List<Leaf> result = new ArrayList<Leaf>();
|
||||
List<Object> path = new ArrayList<Object>();
|
||||
fillAllLeaves(data, path, result);
|
||||
return result;
|
||||
}
|
||||
|
||||
private void fillAllLeaves(final Map map, final List<Object> path, final List<Leaf> result) {
|
||||
for ( final Object key : map.keySet() ) {
|
||||
final Object value = map.get(key);
|
||||
if ( value == null )
|
||||
continue;
|
||||
final List<Object> newPath = new ArrayList<Object>(path);
|
||||
newPath.add(key);
|
||||
if ( value instanceof Map ) {
|
||||
fillAllLeaves((Map) value, newPath, result);
|
||||
} else {
|
||||
result.add(new Leaf(newPath, value));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,6 +6,8 @@ import net.sf.picard.util.Interval;
|
|||
import net.sf.picard.util.IntervalList;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broadinstitute.sting.commandline.IntervalBinding;
|
||||
import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSource;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
|
|
@ -169,21 +171,23 @@ public class IntervalUtils {
|
|||
*/
|
||||
public static List<GenomeLoc> mergeListsBySetOperator(List<GenomeLoc> setOne, List<GenomeLoc> setTwo, IntervalSetRule rule) {
|
||||
// shortcut, if either set is zero, return the other set
|
||||
if (setOne == null || setOne.size() == 0 || setTwo == null || setTwo.size() == 0) return (setOne == null || setOne.size() == 0) ? setTwo : setOne;
|
||||
if (setOne == null || setOne.size() == 0 || setTwo == null || setTwo.size() == 0)
|
||||
return Collections.unmodifiableList((setOne == null || setOne.size() == 0) ? setTwo : setOne);
|
||||
|
||||
// our master list, since we can't guarantee removal time in a generic list
|
||||
LinkedList<GenomeLoc> retList = new LinkedList<GenomeLoc>();
|
||||
|
||||
// if we're set to UNION, just add them all
|
||||
if (rule == IntervalSetRule.UNION) {
|
||||
setOne.addAll(setTwo);
|
||||
return setOne;
|
||||
if (rule == null || rule == IntervalSetRule.UNION) {
|
||||
retList.addAll(setOne);
|
||||
retList.addAll(setTwo);
|
||||
return Collections.unmodifiableList(retList);
|
||||
}
|
||||
|
||||
// else we're INTERSECTION, create two indexes into the lists
|
||||
int iOne = 0;
|
||||
int iTwo = 0;
|
||||
|
||||
// our master list, since we can't guarantee removal time in a generic list
|
||||
LinkedList<GenomeLoc> retList = new LinkedList<GenomeLoc>();
|
||||
|
||||
// merge the second into the first using the rule
|
||||
while (iTwo < setTwo.size() && iOne < setOne.size())
|
||||
// if the first list is ahead, drop items off the second until we overlap
|
||||
|
|
@ -204,7 +208,7 @@ public class IntervalUtils {
|
|||
throw new UserException.BadInput("The INTERSECTION of your -L options produced no intervals.");
|
||||
|
||||
// we don't need to add the rest of remaining locations, since we know they don't overlap. return what we have
|
||||
return retList;
|
||||
return Collections.unmodifiableList(retList);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -218,6 +222,8 @@ public class IntervalUtils {
|
|||
* @return A sorted, merged version of the intervals passed in.
|
||||
*/
|
||||
public static GenomeLocSortedSet sortAndMergeIntervals(GenomeLocParser parser, List<GenomeLoc> intervals, IntervalMergingRule mergingRule) {
|
||||
// Make a copy of the (potentially unmodifiable) list to be sorted
|
||||
intervals = new ArrayList<GenomeLoc>(intervals);
|
||||
// sort raw interval list
|
||||
Collections.sort(intervals);
|
||||
// now merge raw interval list
|
||||
|
|
@ -481,6 +487,70 @@ public class IntervalUtils {
|
|||
return new SplitLocusRecursive(split, remaining);
|
||||
}
|
||||
|
||||
/**
|
||||
* Setup the intervals to be processed
|
||||
*/
|
||||
public static GenomeLocSortedSet parseIntervalBindings(
|
||||
final ReferenceDataSource referenceDataSource,
|
||||
final List<IntervalBinding<Feature>> intervals,
|
||||
final IntervalSetRule intervalSetRule, final IntervalMergingRule intervalMergingRule, final int intervalPadding,
|
||||
final List<IntervalBinding<Feature>> excludeIntervals) {
|
||||
|
||||
Pair<GenomeLocSortedSet, GenomeLocSortedSet> includeExcludePair = parseIntervalBindingsPair(
|
||||
referenceDataSource, intervals, intervalSetRule, intervalMergingRule, intervalPadding, excludeIntervals);
|
||||
|
||||
GenomeLocSortedSet includeSortedSet = includeExcludePair.getFirst();
|
||||
GenomeLocSortedSet excludeSortedSet = includeExcludePair.getSecond();
|
||||
|
||||
if (excludeSortedSet != null) {
|
||||
return includeSortedSet.subtractRegions(excludeSortedSet);
|
||||
} else {
|
||||
return includeSortedSet;
|
||||
}
|
||||
}
|
||||
|
||||
public static Pair<GenomeLocSortedSet, GenomeLocSortedSet> parseIntervalBindingsPair(
|
||||
final ReferenceDataSource referenceDataSource,
|
||||
final List<IntervalBinding<Feature>> intervals,
|
||||
final IntervalSetRule intervalSetRule, final IntervalMergingRule intervalMergingRule, final int intervalPadding,
|
||||
final List<IntervalBinding<Feature>> excludeIntervals) {
|
||||
GenomeLocParser genomeLocParser = new GenomeLocParser(referenceDataSource.getReference());
|
||||
|
||||
// if include argument isn't given, create new set of all possible intervals
|
||||
GenomeLocSortedSet includeSortedSet = ((intervals == null || intervals.size() == 0) ?
|
||||
GenomeLocSortedSet.createSetFromSequenceDictionary(referenceDataSource.getReference().getSequenceDictionary()) :
|
||||
loadIntervals(intervals, intervalSetRule, intervalMergingRule, intervalPadding, genomeLocParser));
|
||||
|
||||
GenomeLocSortedSet excludeSortedSet = null;
|
||||
if (excludeIntervals != null && excludeIntervals.size() > 0) {
|
||||
excludeSortedSet = loadIntervals(excludeIntervals, IntervalSetRule.UNION, intervalMergingRule, 0, genomeLocParser);
|
||||
}
|
||||
return new Pair<GenomeLocSortedSet, GenomeLocSortedSet>(includeSortedSet, excludeSortedSet);
|
||||
}
|
||||
|
||||
public static GenomeLocSortedSet loadIntervals(
|
||||
final List<IntervalBinding<Feature>> intervalBindings,
|
||||
final IntervalSetRule rule, final IntervalMergingRule intervalMergingRule, final int padding,
|
||||
final GenomeLocParser genomeLocParser) {
|
||||
List<GenomeLoc> allIntervals = new ArrayList<GenomeLoc>();
|
||||
for ( IntervalBinding intervalBinding : intervalBindings) {
|
||||
@SuppressWarnings("unchecked")
|
||||
List<GenomeLoc> intervals = intervalBinding.getIntervals(genomeLocParser);
|
||||
|
||||
if ( intervals.isEmpty() ) {
|
||||
logger.warn("The interval file " + intervalBinding.getSource() + " contains no intervals that could be parsed.");
|
||||
}
|
||||
|
||||
if ( padding > 0 ) {
|
||||
intervals = getIntervalsWithFlanks(genomeLocParser, intervals, padding);
|
||||
}
|
||||
|
||||
allIntervals = mergeListsBySetOperator(intervals, allIntervals, rule);
|
||||
}
|
||||
|
||||
return sortAndMergeIntervals(genomeLocParser, allIntervals, intervalMergingRule);
|
||||
}
|
||||
|
||||
private final static class SplitLocusRecursive {
|
||||
final List<GenomeLoc> split;
|
||||
final LinkedList<GenomeLoc> remaining;
|
||||
|
|
@ -546,7 +616,7 @@ public class IntervalUtils {
|
|||
*/
|
||||
public static List<GenomeLoc> mergeIntervalLocations(final List<GenomeLoc> raw, IntervalMergingRule rule) {
|
||||
if (raw.size() <= 1)
|
||||
return raw;
|
||||
return Collections.unmodifiableList(raw);
|
||||
else {
|
||||
ArrayList<GenomeLoc> merged = new ArrayList<GenomeLoc>();
|
||||
Iterator<GenomeLoc> it = raw.iterator();
|
||||
|
|
@ -555,7 +625,7 @@ public class IntervalUtils {
|
|||
GenomeLoc curr = it.next();
|
||||
if (prev.overlapsP(curr)) {
|
||||
prev = prev.merge(curr);
|
||||
} else if (prev.contiguousP(curr) && rule == IntervalMergingRule.ALL) {
|
||||
} else if (prev.contiguousP(curr) && (rule == null || rule == IntervalMergingRule.ALL)) {
|
||||
prev = prev.merge(curr);
|
||||
} else {
|
||||
merged.add(prev);
|
||||
|
|
@ -563,7 +633,7 @@ public class IntervalUtils {
|
|||
}
|
||||
}
|
||||
merged.add(prev);
|
||||
return merged;
|
||||
return Collections.unmodifiableList(merged);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -28,10 +28,10 @@ package org.broadinstitute.sting.utils.recalibration;
|
|||
import org.broadinstitute.sting.gatk.walkers.bqsr.*;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.collections.NestedHashMap;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Utility methods to facilitate on-the-fly base quality score recalibration.
|
||||
|
|
@ -45,39 +45,15 @@ public class BaseRecalibration {
|
|||
private final ReadCovariates readCovariates;
|
||||
|
||||
private final QuantizationInfo quantizationInfo; // histogram containing the map for qual quantization (calculated after recalibration is done)
|
||||
private final KeysAndTables keysAndTables;
|
||||
private final RecalibrationTables recalibrationTables;
|
||||
private final Covariate[] requestedCovariates; // list of all covariates to be used in this calculation
|
||||
|
||||
static class KeysAndTables {
|
||||
private final Object[] tempKeySet;
|
||||
|
||||
public enum Type {
|
||||
READ_GROUP_TABLE(0),
|
||||
QUALITY_SCORE_TABLE(1),
|
||||
OPTIONAL_COVARIATE_TABLE(2);
|
||||
|
||||
private final int index;
|
||||
|
||||
private Type(int index) {
|
||||
this.index = index;
|
||||
}
|
||||
}
|
||||
|
||||
public final BQSRKeyManager[] managers = new BQSRKeyManager[Type.values().length];
|
||||
public final Map<Long, RecalDatum>[] tables = new Map[Type.values().length];
|
||||
|
||||
public KeysAndTables(final Map<BQSRKeyManager, Map<Long, RecalDatum>> keysAndTablesMap) {
|
||||
for (Map.Entry<BQSRKeyManager, Map<Long, RecalDatum>> mapEntry : keysAndTablesMap.entrySet()) {
|
||||
Type type;
|
||||
if (mapEntry.getKey().getNumRequiredCovariates() == 1)
|
||||
type = Type.READ_GROUP_TABLE;
|
||||
else if (mapEntry.getKey().getNumOptionalCovariates() == 0)
|
||||
type = Type.QUALITY_SCORE_TABLE;
|
||||
else
|
||||
type = Type.OPTIONAL_COVARIATE_TABLE;
|
||||
managers[type.index] = mapEntry.getKey();
|
||||
tables[type.index] = mapEntry.getValue();
|
||||
}
|
||||
}
|
||||
private static final NestedHashMap[] qualityScoreByFullCovariateKey = new NestedHashMap[EventType.values().length]; // Caches the result of performSequentialQualityCalculation(..) for all sets of covariate values.
|
||||
static {
|
||||
for (int i = 0; i < EventType.values().length; i++)
|
||||
qualityScoreByFullCovariateKey[i] = new NestedHashMap();
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -89,7 +65,7 @@ public class BaseRecalibration {
|
|||
public BaseRecalibration(final File RECAL_FILE, int quantizationLevels) {
|
||||
RecalibrationReport recalibrationReport = new RecalibrationReport(RECAL_FILE);
|
||||
|
||||
keysAndTables = new KeysAndTables(recalibrationReport.getKeysAndTablesMap());
|
||||
recalibrationTables = recalibrationReport.getRecalibrationTables();
|
||||
requestedCovariates = recalibrationReport.getRequestedCovariates();
|
||||
quantizationInfo = recalibrationReport.getQuantizationInfo();
|
||||
if (quantizationLevels == 0) // quantizationLevels == 0 means no quantization, preserve the quality scores
|
||||
|
|
@ -98,20 +74,22 @@ public class BaseRecalibration {
|
|||
quantizationInfo.quantizeQualityScores(quantizationLevels);
|
||||
|
||||
readCovariates = new ReadCovariates(MAXIMUM_RECALIBRATED_READ_LENGTH, requestedCovariates.length);
|
||||
tempKeySet = new Integer[requestedCovariates.length];
|
||||
}
|
||||
|
||||
/**
|
||||
* This constructor only exists for testing purposes.
|
||||
*
|
||||
* @param quantizationInfo the quantization info object
|
||||
* @param keysAndTablesMap the map of key managers and recalibration tables
|
||||
* @param recalibrationTables the map of key managers and recalibration tables
|
||||
* @param requestedCovariates the list of requested covariates
|
||||
*/
|
||||
protected BaseRecalibration(final QuantizationInfo quantizationInfo, final LinkedHashMap<BQSRKeyManager, Map<Long, RecalDatum>> keysAndTablesMap, final Covariate[] requestedCovariates) {
|
||||
protected BaseRecalibration(final QuantizationInfo quantizationInfo, final RecalibrationTables recalibrationTables, final Covariate[] requestedCovariates) {
|
||||
this.quantizationInfo = quantizationInfo;
|
||||
keysAndTables = new KeysAndTables(keysAndTablesMap);
|
||||
this.recalibrationTables = recalibrationTables;
|
||||
this.requestedCovariates = requestedCovariates;
|
||||
readCovariates = new ReadCovariates(MAXIMUM_RECALIBRATED_READ_LENGTH, requestedCovariates.length);
|
||||
tempKeySet = new Integer[requestedCovariates.length];
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -125,13 +103,20 @@ public class BaseRecalibration {
|
|||
RecalDataManager.computeCovariates(read, requestedCovariates, readCovariates); // compute all covariates for the read
|
||||
for (final EventType errorModel : EventType.values()) { // recalibrate all three quality strings
|
||||
final byte[] quals = read.getBaseQualities(errorModel);
|
||||
final int[][] fullReadKeySet = readCovariates.getKeySet(errorModel); // get the keyset for this base using the error model
|
||||
|
||||
final int readLength = read.getReadLength();
|
||||
for (int offset = 0; offset < readLength; offset++) { // recalibrate all bases in the read
|
||||
|
||||
for (int offset = 0; offset < read.getReadLength(); offset++) { // recalibrate all bases in the read
|
||||
final byte originalQualityScore = quals[offset];
|
||||
|
||||
if (originalQualityScore >= QualityUtils.MIN_USABLE_Q_SCORE) { // only recalibrate usable qualities (the original quality will come from the instrument -- reported quality)
|
||||
final long[] keySet = readCovariates.getKeySet(offset, errorModel); // get the keyset for this base using the error model
|
||||
final byte recalibratedQualityScore = performSequentialQualityCalculation(keySet, errorModel); // recalibrate the base
|
||||
final int[] keySet = fullReadKeySet[offset]; // get the keyset for this base using the error model
|
||||
Byte recalibratedQualityScore = (Byte) qualityScoreByFullCovariateKey[errorModel.index].get(wrapKeySet(keySet));
|
||||
if (recalibratedQualityScore == null) {
|
||||
recalibratedQualityScore = performSequentialQualityCalculation(keySet, errorModel); // recalibrate the base
|
||||
qualityScoreByFullCovariateKey[errorModel.index].put(recalibratedQualityScore, keySet);
|
||||
}
|
||||
quals[offset] = recalibratedQualityScore;
|
||||
}
|
||||
}
|
||||
|
|
@ -139,7 +124,11 @@ public class BaseRecalibration {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
private Object[] wrapKeySet(final int[] keySet) {
|
||||
for (int i = 0; i < keySet.length; i++)
|
||||
tempKeySet[i] = keySet[i];
|
||||
return tempKeySet;
|
||||
}
|
||||
|
||||
/**
|
||||
* Implements a serial recalibration of the reads using the combinational table.
|
||||
|
|
@ -158,24 +147,23 @@ public class BaseRecalibration {
|
|||
* @param errorModel the event type
|
||||
* @return A recalibrated quality score as a byte
|
||||
*/
|
||||
protected byte performSequentialQualityCalculation(final long[] key, final EventType errorModel) {
|
||||
protected byte performSequentialQualityCalculation(final int[] key, final EventType errorModel) {
|
||||
|
||||
final double globalDeltaQ = calculateGlobalDeltaQ(keysAndTables.managers[KeysAndTables.Type.READ_GROUP_TABLE.index], keysAndTables.tables[KeysAndTables.Type.READ_GROUP_TABLE.index], key, errorModel);
|
||||
final double deltaQReported = calculateDeltaQReported(keysAndTables.managers[KeysAndTables.Type.QUALITY_SCORE_TABLE.index], keysAndTables.tables[KeysAndTables.Type.QUALITY_SCORE_TABLE.index], key, errorModel, globalDeltaQ);
|
||||
final double deltaQCovariates = calculateDeltaQCovariates(keysAndTables.managers[KeysAndTables.Type.OPTIONAL_COVARIATE_TABLE.index], keysAndTables.tables[KeysAndTables.Type.OPTIONAL_COVARIATE_TABLE.index], key, errorModel, globalDeltaQ, deltaQReported);
|
||||
final byte qualFromRead = (byte)(long)key[1];
|
||||
final double globalDeltaQ = calculateGlobalDeltaQ(recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE), key, errorModel);
|
||||
final double deltaQReported = calculateDeltaQReported(recalibrationTables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE), key, errorModel, globalDeltaQ, qualFromRead);
|
||||
final double deltaQCovariates = calculateDeltaQCovariates(recalibrationTables.getTable(RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLE), key, errorModel, globalDeltaQ, deltaQReported, qualFromRead);
|
||||
|
||||
final byte qualFromRead = (byte)key[1];
|
||||
double recalibratedQual = qualFromRead + globalDeltaQ + deltaQReported + deltaQCovariates; // calculate the recalibrated qual using the BQSR formula
|
||||
recalibratedQual = QualityUtils.boundQual(MathUtils.fastRound(recalibratedQual), QualityUtils.MAX_RECALIBRATED_Q_SCORE); // recalibrated quality is bound between 1 and MAX_QUAL
|
||||
|
||||
return quantizationInfo.getQuantizedQuals().get((int) recalibratedQual); // return the quantized version of the recalibrated quality
|
||||
}
|
||||
|
||||
private double calculateGlobalDeltaQ(final BQSRKeyManager keyManager, final Map<Long, RecalDatum> table, final long[] key, final EventType errorModel) {
|
||||
private double calculateGlobalDeltaQ(final NestedHashMap table, final int[] key, final EventType errorModel) {
|
||||
double result = 0.0;
|
||||
|
||||
final long masterKey = keyManager.createMasterKey(key, errorModel, -1);
|
||||
final RecalDatum empiricalQualRG = table.get(masterKey);
|
||||
final RecalDatum empiricalQualRG = (RecalDatum)table.get(key[0], errorModel.index);
|
||||
if (empiricalQualRG != null) {
|
||||
final double globalDeltaQEmpirical = empiricalQualRG.getEmpiricalQuality();
|
||||
final double aggregrateQReported = empiricalQualRG.getEstimatedQReported();
|
||||
|
|
@ -185,32 +173,28 @@ public class BaseRecalibration {
|
|||
return result;
|
||||
}
|
||||
|
||||
private double calculateDeltaQReported(final BQSRKeyManager keyManager, final Map<Long, RecalDatum> table, final long[] key, final EventType errorModel, final double globalDeltaQ) {
|
||||
private double calculateDeltaQReported(final NestedHashMap table, final int[] key, final EventType errorModel, final double globalDeltaQ, final byte qualFromRead) {
|
||||
double result = 0.0;
|
||||
|
||||
final long masterKey = keyManager.createMasterKey(key, errorModel, -1);
|
||||
final RecalDatum empiricalQualQS = table.get(masterKey);
|
||||
final RecalDatum empiricalQualQS = (RecalDatum)table.get(key[0], key[1], errorModel.index);
|
||||
if (empiricalQualQS != null) {
|
||||
final double deltaQReportedEmpirical = empiricalQualQS.getEmpiricalQuality();
|
||||
final byte qualFromRead = (byte)key[1];
|
||||
result = deltaQReportedEmpirical - qualFromRead - globalDeltaQ;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private double calculateDeltaQCovariates(final BQSRKeyManager keyManager, final Map<Long, RecalDatum> table, final long[] key, final EventType errorModel, final double globalDeltaQ, final double deltaQReported) {
|
||||
private double calculateDeltaQCovariates(final NestedHashMap table, final int[] key, final EventType errorModel, final double globalDeltaQ, final double deltaQReported, final byte qualFromRead) {
|
||||
double result = 0.0;
|
||||
|
||||
final int numOptionalCovariates = keyManager.getNumOptionalCovariates();
|
||||
for (int i = 0; i < numOptionalCovariates; i++) {
|
||||
final long masterKey = keyManager.createMasterKey(key, errorModel, i);
|
||||
if (masterKey < 0)
|
||||
// for all optional covariates
|
||||
for (int i = 2; i < requestedCovariates.length; i++) {
|
||||
if (key[i] < 0)
|
||||
continue;
|
||||
final RecalDatum empiricalQualCO = table.get(masterKey);
|
||||
final RecalDatum empiricalQualCO = (RecalDatum)table.get(key[0], key[1], (i-2), key[i], errorModel.index);
|
||||
if (empiricalQualCO != null) {
|
||||
final double deltaQCovariateEmpirical = empiricalQualCO.getEmpiricalQuality();
|
||||
final byte qualFromRead = (byte)key[1];
|
||||
result += (deltaQCovariateEmpirical - qualFromRead - (globalDeltaQ + deltaQReported));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,62 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.recalibration;
|
||||
|
||||
import org.broadinstitute.sting.utils.collections.NestedHashMap;
|
||||
|
||||
/**
|
||||
* Utility class to facilitate on-the-fly base quality score recalibration.
|
||||
*
|
||||
* User: ebanks
|
||||
* Date: 6/20/12
|
||||
*/
|
||||
|
||||
public class RecalibrationTables {
|
||||
|
||||
public enum TableType {
|
||||
READ_GROUP_TABLE(0),
|
||||
QUALITY_SCORE_TABLE(1),
|
||||
OPTIONAL_COVARIATE_TABLE(2);
|
||||
|
||||
private final int index;
|
||||
|
||||
private TableType(final int index) {
|
||||
this.index = index;
|
||||
}
|
||||
}
|
||||
|
||||
private final NestedHashMap[] tables = new NestedHashMap[TableType.values().length];
|
||||
|
||||
public RecalibrationTables(final NestedHashMap rgMap, final NestedHashMap qualMap, final NestedHashMap covMap) {
|
||||
tables[TableType.READ_GROUP_TABLE.index] = rgMap;
|
||||
tables[TableType.QUALITY_SCORE_TABLE.index] = qualMap;
|
||||
tables[TableType.OPTIONAL_COVARIATE_TABLE.index] = covMap;
|
||||
}
|
||||
|
||||
public NestedHashMap getTable(final TableType type) {
|
||||
return tables[type.index];
|
||||
}
|
||||
}
|
||||
|
|
@ -554,7 +554,7 @@ public abstract class Genotype implements Comparable<Genotype> {
|
|||
pairs.add(k + "=" + c.get(k));
|
||||
}
|
||||
|
||||
return "{" + ParsingUtils.join(", ", pairs.toArray(new String[pairs.size()])) + "}";
|
||||
return pairs.isEmpty() ? "" : " {" + ParsingUtils.join(", ", pairs.toArray(new String[pairs.size()])) + "}";
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -24,6 +24,8 @@
|
|||
|
||||
package org.broadinstitute.sting.utils.variantcontext;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import org.broad.tribble.TribbleException;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
|
|
@ -34,6 +36,11 @@ import java.util.Arrays;
|
|||
import java.util.EnumMap;
|
||||
|
||||
public class GenotypeLikelihoods {
|
||||
private final static int NUM_LIKELIHOODS_CACHE_N_ALLELES = 5;
|
||||
private final static int NUM_LIKELIHOODS_CACHE_PLOIDY = 10;
|
||||
// caching numAlleles up to 5 and ploidy up to 10
|
||||
private final static int[][] numLikelihoodCache = new int[NUM_LIKELIHOODS_CACHE_N_ALLELES][NUM_LIKELIHOODS_CACHE_PLOIDY];
|
||||
|
||||
public final static int MAX_PL = Short.MAX_VALUE;
|
||||
|
||||
//
|
||||
|
|
@ -44,6 +51,29 @@ public class GenotypeLikelihoods {
|
|||
private double[] log10Likelihoods = null;
|
||||
private String likelihoodsAsString_PLs = null;
|
||||
|
||||
|
||||
/**
|
||||
* initialize num likelihoods cache
|
||||
*/
|
||||
static {
|
||||
// must be done before PLIndexToAlleleIndex
|
||||
for ( int numAlleles = 1; numAlleles < NUM_LIKELIHOODS_CACHE_N_ALLELES; numAlleles++ ) {
|
||||
for ( int ploidy = 1; ploidy < NUM_LIKELIHOODS_CACHE_PLOIDY; ploidy++ ) {
|
||||
numLikelihoodCache[numAlleles][ploidy] = calcNumLikelihoods(numAlleles, ploidy);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* The maximum number of alleles that we can represent as genotype likelihoods
|
||||
*/
|
||||
public final static int MAX_ALT_ALLELES_THAT_CAN_BE_GENOTYPED = 50;
|
||||
|
||||
/*
|
||||
* a cache of the PL index to the 2 alleles it represents over all possible numbers of alternate alleles
|
||||
*/
|
||||
private final static GenotypeLikelihoodsAllelePair[] PLIndexToAlleleIndex = calculatePLcache(MAX_ALT_ALLELES_THAT_CAN_BE_GENOTYPED);
|
||||
|
||||
public final static GenotypeLikelihoods fromPLField(String PLs) {
|
||||
return new GenotypeLikelihoods(PLs);
|
||||
}
|
||||
|
|
@ -245,47 +275,11 @@ public class GenotypeLikelihoods {
|
|||
return likelihoodsAsVector;
|
||||
}
|
||||
|
||||
// // -------------------------------------------------------------------------------------
|
||||
// //
|
||||
// // List interface functions
|
||||
// //
|
||||
// // -------------------------------------------------------------------------------------
|
||||
//
|
||||
// private final void notImplemented() {
|
||||
// throw new ReviewedStingException("BUG: code not implemented");
|
||||
// }
|
||||
//
|
||||
// @Override public int size() { return getAsVector().length; }
|
||||
// @Override public Double get(final int i) { return getAsVector()[i];}
|
||||
// @Override public Double set(final int i, final Double aDouble) { return getAsVector()[i] = aDouble; }
|
||||
// @Override public boolean isEmpty() { return false; }
|
||||
// @Override public Iterator<Double> iterator() { return Arrays.asList(ArrayUtils.toObject(getAsVector())).iterator(); }
|
||||
// @Override public Object[] toArray() { return ArrayUtils.toObject(getAsVector()); }
|
||||
//
|
||||
// // none of these are implemented
|
||||
// @Override public boolean contains(final Object o) { notImplemented(); return false; }
|
||||
// @Override public <T> T[] toArray(final T[] ts) { notImplemented(); return null; }
|
||||
// @Override public boolean add(final Double aDouble) { notImplemented(); return false; }
|
||||
// @Override public boolean remove(final Object o) {notImplemented(); return false; }
|
||||
// @Override public boolean containsAll(final Collection<?> objects) { notImplemented(); return false; }
|
||||
// @Override public boolean addAll(final Collection<? extends Double> doubles) { notImplemented(); return false; }
|
||||
// @Override public boolean addAll(final int i, final Collection<? extends Double> doubles) { notImplemented(); return false; }
|
||||
// @Override public boolean removeAll(final Collection<?> objects) { notImplemented(); return false; }
|
||||
// @Override public boolean retainAll(final Collection<?> objects) { notImplemented(); return false; }
|
||||
// @Override public void clear() { notImplemented(); }
|
||||
// @Override public void add(final int i, final Double aDouble) { notImplemented(); }
|
||||
// @Override public Double remove(final int i) { notImplemented(); return null; }
|
||||
// @Override public int indexOf(final Object o) { notImplemented(); return -1; }
|
||||
// @Override public int lastIndexOf(final Object o) { notImplemented(); return 0; }
|
||||
// @Override public ListIterator<Double> listIterator() { notImplemented(); return null; }
|
||||
// @Override public ListIterator<Double> listIterator(final int i) { notImplemented(); return null; }
|
||||
// @Override public List<Double> subList(final int i, final int i1) { notImplemented(); return null; }
|
||||
|
||||
// -------------------------------------------------------------------------------------
|
||||
//
|
||||
// Static conversion utilities, going from GL/PL index to allele index and vice versa.
|
||||
//
|
||||
// -------------------------------------------------------------------------------------
|
||||
// -------------------------------------------------------------------------------------
|
||||
//
|
||||
// Static conversion utilities, going from GL/PL index to allele index and vice versa.
|
||||
//
|
||||
// -------------------------------------------------------------------------------------
|
||||
|
||||
/*
|
||||
* Class representing the 2 alleles (or rather their indexes into VariantContext.getAllele()) corresponding to a specific PL index.
|
||||
|
|
@ -300,18 +294,8 @@ public class GenotypeLikelihoods {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* The maximum number of alleles that we can represent as genotype likelihoods
|
||||
*/
|
||||
public final static int MAX_ALT_ALLELES_THAT_CAN_BE_GENOTYPED = 50;
|
||||
|
||||
/*
|
||||
* a cache of the PL index to the 2 alleles it represents over all possible numbers of alternate alleles
|
||||
*/
|
||||
private final static GenotypeLikelihoodsAllelePair[] PLIndexToAlleleIndex = calculatePLcache(MAX_ALT_ALLELES_THAT_CAN_BE_GENOTYPED);
|
||||
|
||||
private static GenotypeLikelihoodsAllelePair[] calculatePLcache(final int altAlleles) {
|
||||
final int numLikelihoods = calculateNumLikelihoods(1+altAlleles, 2);
|
||||
final int numLikelihoods = numLikelihoods(1 + altAlleles, 2);
|
||||
final GenotypeLikelihoodsAllelePair[] cache = new GenotypeLikelihoodsAllelePair[numLikelihoods];
|
||||
|
||||
// for all possible combinations of 2 alleles
|
||||
|
|
@ -330,6 +314,32 @@ public class GenotypeLikelihoods {
|
|||
return cache;
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------------------
|
||||
//
|
||||
// num likelihoods given number of alleles and ploidy
|
||||
//
|
||||
// -------------------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Actually does the computation in @see #numLikelihoods
|
||||
*
|
||||
* @param numAlleles
|
||||
* @param ploidy
|
||||
* @return
|
||||
*/
|
||||
private static final int calcNumLikelihoods(final int numAlleles, final int ploidy) {
|
||||
if (numAlleles == 1)
|
||||
return 1;
|
||||
else if (ploidy == 1)
|
||||
return numAlleles;
|
||||
else {
|
||||
int acc =0;
|
||||
for (int k=0; k <= ploidy; k++ )
|
||||
acc += calcNumLikelihoods(numAlleles - 1, ploidy - k);
|
||||
return acc;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute how many likelihood elements are associated with the given number of alleles
|
||||
* Equivalent to asking in how many ways N non-negative integers can add up to P is S(N,P)
|
||||
|
|
@ -344,6 +354,8 @@ public class GenotypeLikelihoods {
|
|||
* which is then, for ordering above, (2,0,0), (1,1,0), (0,2,0), (1,1,0), (0,1,1), (0,0,2)
|
||||
* In general, for P=2 (regular biallelic), then S(N,2) = N*(N+1)/2
|
||||
*
|
||||
* Note this method caches the value for most common num Allele / ploidy combinations for efficiency
|
||||
*
|
||||
* Recursive implementation:
|
||||
* S(N,P) = sum_{k=0}^P S(N-1,P-k)
|
||||
* because if we have N integers, we can condition 1 integer to be = k, and then N-1 integers have to sum to P-K
|
||||
|
|
@ -355,23 +367,16 @@ public class GenotypeLikelihoods {
|
|||
* @param ploidy Ploidy, or number of chromosomes in set
|
||||
* @return Number of likelihood elements we need to hold.
|
||||
*/
|
||||
public static int calculateNumLikelihoods(final int numAlleles, final int ploidy) {
|
||||
|
||||
// fast, closed form solution for diploid samples (most common use case)
|
||||
if (ploidy==2)
|
||||
return numAlleles*(numAlleles+1)/2;
|
||||
|
||||
if (numAlleles == 1)
|
||||
return 1;
|
||||
else if (ploidy == 1)
|
||||
return numAlleles;
|
||||
|
||||
int acc =0;
|
||||
for (int k=0; k <= ploidy; k++ )
|
||||
acc += calculateNumLikelihoods(numAlleles-1, ploidy-k);
|
||||
|
||||
return acc;
|
||||
|
||||
@Requires({"ploidy > 0", "numAlleles > 0"})
|
||||
@Ensures("result > 0")
|
||||
public static int numLikelihoods(final int numAlleles, final int ploidy) {
|
||||
if ( numAlleles < NUM_LIKELIHOODS_CACHE_N_ALLELES
|
||||
&& ploidy < NUM_LIKELIHOODS_CACHE_PLOIDY )
|
||||
return numLikelihoodCache[numAlleles][ploidy];
|
||||
else {
|
||||
// have to calculate on the fly
|
||||
return calcNumLikelihoods(numAlleles, ploidy);
|
||||
}
|
||||
}
|
||||
|
||||
// As per the VCF spec: "the ordering of genotypes for the likelihoods is given by: F(j/k) = (k*(k+1)/2)+j.
|
||||
|
|
|
|||
|
|
@ -61,6 +61,11 @@ public class GenotypesContext implements List<Genotype> {
|
|||
*/
|
||||
ArrayList<Genotype> notToBeDirectlyAccessedGenotypes;
|
||||
|
||||
/**
|
||||
* Cached value of the maximum ploidy observed among all samples
|
||||
*/
|
||||
private int maxPloidy = -1;
|
||||
|
||||
/** Are we allowing users to modify the list? */
|
||||
boolean immutable = false;
|
||||
|
||||
|
|
@ -408,6 +413,17 @@ public class GenotypesContext implements List<Genotype> {
|
|||
return getGenotypes().get(i);
|
||||
}
|
||||
|
||||
@Ensures("result >= 0")
|
||||
public int getMaxPloidy() {
|
||||
if ( maxPloidy == -1 ) {
|
||||
maxPloidy = 0; // necessary in the case where there are no genotypes
|
||||
for ( final Genotype g : getGenotypes() ) {
|
||||
maxPloidy = Math.max(g.getPloidy(), maxPloidy);
|
||||
}
|
||||
}
|
||||
return maxPloidy;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets sample associated with this sampleName, or null if none is found
|
||||
*
|
||||
|
|
|
|||
|
|
@ -626,14 +626,13 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
|||
|
||||
/**
|
||||
* Returns the maximum ploidy of all samples in this VC, or -1 if there are no genotypes
|
||||
*
|
||||
* This function is caching, so it's only expensive on the first call
|
||||
*
|
||||
* @return -1, or the max ploidy
|
||||
*/
|
||||
public int getMaxPloidy() {
|
||||
int max = -1;
|
||||
for ( final Genotype g : getGenotypes() ) {
|
||||
max = Math.max(g.getPloidy(), max);
|
||||
}
|
||||
return max;
|
||||
return genotypes.getMaxPloidy();
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -1337,13 +1336,13 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
|||
* @param header containing types about all fields in this VC
|
||||
* @return a fully decoded version of this VC
|
||||
*/
|
||||
public VariantContext fullyDecode(final VCFHeader header) {
|
||||
public VariantContext fullyDecode(final VCFHeader header, final boolean lenientDecoding) {
|
||||
if ( isFullyDecoded() )
|
||||
return this;
|
||||
else {
|
||||
// TODO -- warning this is potentially very expensive as it creates copies over and over
|
||||
final VariantContextBuilder builder = new VariantContextBuilder(this);
|
||||
fullyDecodeInfo(builder, header);
|
||||
fullyDecodeInfo(builder, header, lenientDecoding);
|
||||
fullyDecodeGenotypes(builder, header);
|
||||
builder.fullyDecoded(true);
|
||||
return builder.make();
|
||||
|
|
@ -1358,13 +1357,13 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
|||
return fullyDecoded;
|
||||
}
|
||||
|
||||
private final void fullyDecodeInfo(final VariantContextBuilder builder, final VCFHeader header) {
|
||||
builder.attributes(fullyDecodeAttributes(getAttributes(), header, false));
|
||||
private final void fullyDecodeInfo(final VariantContextBuilder builder, final VCFHeader header, final boolean lenientDecoding) {
|
||||
builder.attributes(fullyDecodeAttributes(getAttributes(), header, lenientDecoding));
|
||||
}
|
||||
|
||||
private final Map<String, Object> fullyDecodeAttributes(final Map<String, Object> attributes,
|
||||
final VCFHeader header,
|
||||
final boolean allowMissingValuesComparedToHeader) {
|
||||
final boolean lenientDecoding) {
|
||||
final Map<String, Object> newAttributes = new HashMap<String, Object>(attributes.size());
|
||||
|
||||
for ( final Map.Entry<String, Object> attr : attributes.entrySet() ) {
|
||||
|
|
@ -1377,11 +1376,11 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
|||
final Object decoded = decodeValue(field, attr.getValue(), format);
|
||||
|
||||
if ( decoded != null &&
|
||||
! allowMissingValuesComparedToHeader
|
||||
! lenientDecoding
|
||||
&& format.getCountType() != VCFHeaderLineCount.UNBOUNDED
|
||||
&& format.getType() != VCFHeaderLineType.Flag ) { // we expect exactly the right number of elements
|
||||
final int obsSize = decoded instanceof List ? ((List) decoded).size() : 1;
|
||||
final int expSize = format.getCount(this.getNAlleles() - 1);
|
||||
final int expSize = format.getCount(this);
|
||||
if ( obsSize != expSize ) {
|
||||
throw new UserException.MalformedVCFHeader("Discordant field size detected for field " +
|
||||
field + " at " + getChr() + ":" + getStart() + ". Field had " + obsSize + " values " +
|
||||
|
|
@ -1431,7 +1430,7 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
|||
switch ( format.getType() ) {
|
||||
case Character: return string;
|
||||
case Flag:
|
||||
final boolean b = Boolean.valueOf(string);
|
||||
final boolean b = Boolean.valueOf(string) || string.equals("1");
|
||||
if ( b == false )
|
||||
throw new UserException.MalformedVCF("VariantContext FLAG fields " + field + " cannot contain false values"
|
||||
+ " as seen at " + getChr() + ":" + getStart());
|
||||
|
|
|
|||
|
|
@ -159,16 +159,20 @@ public class VariantContextBuilder {
|
|||
return this;
|
||||
}
|
||||
|
||||
public VariantContextBuilder alleles(final String ... alleleStrings) {
|
||||
List<Allele> alleles = new ArrayList<Allele>(alleleStrings.length);
|
||||
public VariantContextBuilder alleles(final List<String> alleleStrings) {
|
||||
List<Allele> alleles = new ArrayList<Allele>(alleleStrings.size());
|
||||
|
||||
for ( int i = 0; i < alleleStrings.length; i++ ) {
|
||||
alleles.add(Allele.create(alleleStrings[i], i == 0));
|
||||
for ( int i = 0; i < alleleStrings.size(); i++ ) {
|
||||
alleles.add(Allele.create(alleleStrings.get(i), i == 0));
|
||||
}
|
||||
|
||||
return alleles(alleles);
|
||||
}
|
||||
|
||||
public VariantContextBuilder alleles(final String ... alleleStrings) {
|
||||
return alleles(Arrays.asList(alleleStrings));
|
||||
}
|
||||
|
||||
public List<Allele> getAlleles() {
|
||||
return new ArrayList<Allele>(alleles);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -46,6 +46,7 @@ public class VariantContextUtils {
|
|||
public final static String MERGE_FILTER_IN_ALL = "FilteredInAll";
|
||||
public final static String MERGE_REF_IN_ALL = "ReferenceInAll";
|
||||
public final static String MERGE_FILTER_PREFIX = "filterIn";
|
||||
private static final List<Allele> DIPLOID_NO_CALL = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);
|
||||
|
||||
final public static JexlEngine engine = new JexlEngine();
|
||||
public static final int DEFAULT_PLOIDY = 2;
|
||||
|
|
@ -57,6 +58,31 @@ public class VariantContextUtils {
|
|||
engine.setDebug(false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Ensures that VC contains all of the samples in allSamples by adding missing samples to
|
||||
* the resulting VC with default diploid ./. genotypes
|
||||
*
|
||||
* @param vc
|
||||
* @param allSamples
|
||||
* @return
|
||||
*/
|
||||
public static VariantContext addMissingSamples(final VariantContext vc, final Set<String> allSamples) {
|
||||
// TODO -- what's the fastest way to do this calculation?
|
||||
final Set<String> missingSamples = new HashSet<String>(allSamples);
|
||||
missingSamples.removeAll(vc.getSampleNames());
|
||||
|
||||
if ( missingSamples.isEmpty() )
|
||||
return vc;
|
||||
else {
|
||||
//logger.warn("Adding " + missingSamples.size() + " missing samples to called context");
|
||||
final GenotypesContext gc = GenotypesContext.copy(vc.getGenotypes());
|
||||
for ( final String missing : missingSamples ) {
|
||||
gc.add(new GenotypeBuilder(missing).alleles(DIPLOID_NO_CALL).make());
|
||||
}
|
||||
return new VariantContextBuilder(vc).genotypes(gc).make();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Update the attributes of the attributes map given the VariantContext to reflect the
|
||||
* proper chromosome-based VCF tags
|
||||
|
|
@ -1199,8 +1225,8 @@ public class VariantContextUtils {
|
|||
altAlleleIndexToUse[i] = true;
|
||||
}
|
||||
|
||||
// calculateNumLikelihoods takes total # of alleles. Use default # of chromosomes (ploidy) = 2
|
||||
final int numLikelihoods = GenotypeLikelihoods.calculateNumLikelihoods(1+numOriginalAltAlleles, DEFAULT_PLOIDY);
|
||||
// numLikelihoods takes total # of alleles. Use default # of chromosomes (ploidy) = 2
|
||||
final int numLikelihoods = GenotypeLikelihoods.numLikelihoods(1 + numOriginalAltAlleles, DEFAULT_PLOIDY);
|
||||
for ( int PLindex = 0; PLindex < numLikelihoods; PLindex++ ) {
|
||||
final GenotypeLikelihoods.GenotypeLikelihoodsAllelePair alleles = GenotypeLikelihoods.getAllelePair(PLindex);
|
||||
// consider this entry only if both of the alleles are good
|
||||
|
|
|
|||
|
|
@ -185,7 +185,7 @@ public abstract class BCF2FieldEncoder {
|
|||
@Requires("hasContextDeterminedNumElements()")
|
||||
@Ensures("result >= 0")
|
||||
public int numElements(final VariantContext vc) {
|
||||
return headerLine.getCount(vc.getNAlleles() - 1);
|
||||
return headerLine.getCount(vc);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -155,7 +155,7 @@ class BCF2Writer extends IndexingVariantContextWriter {
|
|||
public void add( VariantContext vc ) {
|
||||
if ( doNotWriteGenotypes )
|
||||
vc = new VariantContextBuilder(vc).noGenotypes().make();
|
||||
vc = vc.fullyDecode(header);
|
||||
vc = vc.fullyDecode(header, false);
|
||||
|
||||
super.add(vc); // allow on the fly indexing
|
||||
|
||||
|
|
@ -302,9 +302,7 @@ class BCF2Writer extends IndexingVariantContextWriter {
|
|||
writer.start(encoder, vc);
|
||||
for ( final String name : sampleNames ) {
|
||||
Genotype g = vc.getGenotype(name);
|
||||
if ( g == null )
|
||||
// we don't have any data about g at all
|
||||
g = new GenotypeBuilder(name).alleles(MISSING_GENOTYPE).make();
|
||||
if ( g == null ) VCFWriter.missingSampleError(vc, header);
|
||||
writer.addGenotype(encoder, vc, g);
|
||||
}
|
||||
writer.done(encoder, vc);
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@ package org.broadinstitute.sting.utils.variantcontext.writer;
|
|||
import net.sf.samtools.SAMSequenceDictionary;
|
||||
import org.broad.tribble.TribbleException;
|
||||
import org.broad.tribble.util.ParsingUtils;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
|
@ -339,23 +340,12 @@ class VCFWriter extends IndexingVariantContextWriter {
|
|||
*/
|
||||
private void addGenotypeData(VariantContext vc, Map<Allele, String> alleleMap, List<String> genotypeFormatKeys)
|
||||
throws IOException {
|
||||
// if ( ! mHeader.getGenotypeSamples().containsAll(vc.getSampleNames()) ) {
|
||||
// final List<String> badSampleNames = new ArrayList<String>();
|
||||
// for ( final Genotype g : vc.getGenotypes() )
|
||||
// if ( ! mHeader.getGenotypeSamples().contains(g.getSampleName()) )
|
||||
// badSampleNames.add(g.getSampleName());
|
||||
// throw new ReviewedStingException("BUG: VariantContext contains some samples not in the VCF header: bad samples are " + Utils.join(",",badSampleNames));
|
||||
// }
|
||||
|
||||
for ( String sample : mHeader.getGenotypeSamples() ) {
|
||||
mWriter.write(VCFConstants.FIELD_SEPARATOR);
|
||||
|
||||
Genotype g = vc.getGenotype(sample);
|
||||
if ( g == null ) {
|
||||
// TODO -- The VariantContext needs to know what the general ploidy is of the samples
|
||||
// TODO -- We shouldn't be assuming diploid genotypes here!
|
||||
mWriter.write(VCFConstants.EMPTY_GENOTYPE);
|
||||
continue;
|
||||
missingSampleError(vc, mHeader);
|
||||
}
|
||||
|
||||
List<String> attrs = new ArrayList<String>(genotypeFormatKeys.size());
|
||||
|
|
@ -402,7 +392,7 @@ class VCFWriter extends IndexingVariantContextWriter {
|
|||
|
||||
VCFFormatHeaderLine metaData = mHeader.getFormatHeaderLine(field);
|
||||
if ( metaData != null ) {
|
||||
int numInFormatField = metaData.getCount(vc.getAlternateAlleles().size());
|
||||
int numInFormatField = metaData.getCount(vc);
|
||||
if ( numInFormatField > 1 && val.equals(VCFConstants.MISSING_VALUE_v4) ) {
|
||||
// If we have a missing field but multiple values are expected, we need to construct a new string with all fields.
|
||||
// For example, if Number=2, the string has to be ".,."
|
||||
|
|
@ -439,6 +429,13 @@ class VCFWriter extends IndexingVariantContextWriter {
|
|||
}
|
||||
}
|
||||
|
||||
public static final void missingSampleError(final VariantContext vc, final VCFHeader header) {
|
||||
final List<String> badSampleNames = new ArrayList<String>();
|
||||
for ( final String x : header.getGenotypeSamples() )
|
||||
if ( ! vc.hasGenotype(x) ) badSampleNames.add(x);
|
||||
throw new ReviewedStingException("BUG: we now require all samples in VCFheader to have genotype objects. Missing samples are " + Utils.join(",", badSampleNames));
|
||||
}
|
||||
|
||||
private boolean isMissingValue(String s) {
|
||||
// we need to deal with the case that it's a list of missing values
|
||||
return (countOccurrences(VCFConstants.MISSING_VALUE_v4.charAt(0), s) + countOccurrences(',', s) == s.length());
|
||||
|
|
@ -569,6 +566,6 @@ class VCFWriter extends IndexingVariantContextWriter {
|
|||
+ " at " + vc.getChr() + ":" + vc.getStart()
|
||||
+ " but this key isn't defined in the VCFHeader. The GATK now requires all VCFs to have"
|
||||
+ " complete VCF headers by default. This error can be disabled with the engine argument"
|
||||
+ " --allowMissingVCFHeaders");
|
||||
+ " -U LENIENT_VCF_PROCESSING");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -79,7 +79,7 @@ public class WalkerTest extends BaseTest {
|
|||
|
||||
public void validateOutputBCFIfPossible(final String name, final File resultFile) {
|
||||
final File bcfFile = BCF2Utils.shadowBCF(resultFile);
|
||||
if ( bcfFile.exists() ) {
|
||||
if ( bcfFile != null && bcfFile.exists() ) {
|
||||
logger.warn("Checking shadow BCF output file " + bcfFile + " against VCF file " + resultFile);
|
||||
try {
|
||||
VariantContextTestProvider.assertVCFandBCFFilesAreTheSame(resultFile, bcfFile);
|
||||
|
|
|
|||
|
|
@ -24,32 +24,17 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk;
|
||||
|
||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||
import net.sf.picard.util.Interval;
|
||||
import net.sf.picard.util.IntervalList;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.commandline.ArgumentException;
|
||||
import org.broadinstitute.sting.commandline.IntervalBinding;
|
||||
import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
|
||||
import org.broadinstitute.sting.commandline.Tags;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
|
||||
import org.broadinstitute.sting.gatk.walkers.PrintReadsWalker;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalSetRule;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.PrintWriter;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
|
||||
|
||||
/**
|
||||
* Tests selected functionality in the GenomeAnalysisEngine class
|
||||
|
|
@ -91,65 +76,4 @@ public class GenomeAnalysisEngineUnitTest extends BaseTest {
|
|||
|
||||
testEngine.validateSuppliedIntervals();
|
||||
}
|
||||
|
||||
@DataProvider(name="invalidIntervalTestData")
|
||||
public Object[][] invalidIntervalDataProvider() throws Exception {
|
||||
GenomeAnalysisEngine testEngine = new GenomeAnalysisEngine();
|
||||
GATKArgumentCollection argCollection = new GATKArgumentCollection();
|
||||
testEngine.setArguments(argCollection);
|
||||
|
||||
File fastaFile = new File("public/testdata/exampleFASTA.fasta");
|
||||
GenomeLocParser genomeLocParser = new GenomeLocParser(new IndexedFastaSequenceFile(fastaFile));
|
||||
testEngine.setGenomeLocParser(genomeLocParser);
|
||||
|
||||
return new Object[][] {
|
||||
new Object[] {testEngine, genomeLocParser, "chr1", 10000000, 20000000},
|
||||
new Object[] {testEngine, genomeLocParser, "chr2", 1, 2},
|
||||
new Object[] {testEngine, genomeLocParser, "chr1", -1, 50}
|
||||
};
|
||||
}
|
||||
|
||||
@Test(dataProvider="invalidIntervalTestData")
|
||||
public void testInvalidPicardIntervalHandling(GenomeAnalysisEngine testEngine, GenomeLocParser genomeLocParser,
|
||||
String contig, int intervalStart, int intervalEnd ) throws Exception {
|
||||
|
||||
SAMFileHeader picardFileHeader = new SAMFileHeader();
|
||||
picardFileHeader.addSequence(genomeLocParser.getContigInfo("chr1"));
|
||||
IntervalList picardIntervals = new IntervalList(picardFileHeader);
|
||||
picardIntervals.add(new Interval(contig, intervalStart, intervalEnd, true, "dummyname"));
|
||||
|
||||
File picardIntervalFile = createTempFile("testInvalidPicardIntervalHandling", ".intervals");
|
||||
picardIntervals.write(picardIntervalFile);
|
||||
|
||||
List<IntervalBinding<Feature>> intervalArgs = new ArrayList<IntervalBinding<Feature>>(1);
|
||||
intervalArgs.add(new IntervalBinding<Feature>(picardIntervalFile.getAbsolutePath()));
|
||||
|
||||
testEngine.loadIntervals(intervalArgs, IntervalSetRule.UNION);
|
||||
}
|
||||
|
||||
@Test(expectedExceptions=UserException.class, dataProvider="invalidIntervalTestData")
|
||||
public void testInvalidGATKFileIntervalHandling(GenomeAnalysisEngine testEngine, GenomeLocParser genomeLocParser,
|
||||
String contig, int intervalStart, int intervalEnd ) throws Exception {
|
||||
|
||||
File gatkIntervalFile = createTempFile("testInvalidGATKFileIntervalHandling", ".intervals",
|
||||
String.format("%s:%d-%d", contig, intervalStart, intervalEnd));
|
||||
|
||||
List<IntervalBinding<Feature>> intervalArgs = new ArrayList<IntervalBinding<Feature>>(1);
|
||||
intervalArgs.add(new IntervalBinding<Feature>(gatkIntervalFile.getAbsolutePath()));
|
||||
|
||||
testEngine.loadIntervals(intervalArgs, IntervalSetRule.UNION);
|
||||
}
|
||||
|
||||
private File createTempFile( String tempFilePrefix, String tempFileExtension, String... lines ) throws Exception {
|
||||
File tempFile = File.createTempFile(tempFilePrefix, tempFileExtension);
|
||||
tempFile.deleteOnExit();
|
||||
|
||||
PrintWriter out = new PrintWriter(tempFile);
|
||||
for ( String line : lines ) {
|
||||
out.println(line);
|
||||
}
|
||||
out.close();
|
||||
|
||||
return tempFile;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -58,8 +58,8 @@ public class TestRMDTrackBuilder extends RMDTrackBuilder {
|
|||
Index index;
|
||||
try {
|
||||
// Create a feature reader that creates checkable tribble iterators.
|
||||
index = loadIndex(inputFile, codec);
|
||||
featureReader = new TestFeatureReader(inputFile.getAbsolutePath(), codec);
|
||||
index = loadFromDisk(inputFile, Tribble.indexFile(inputFile));
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -16,7 +16,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testHasAnnotsNotAsking1() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " --variant " + privateTestDir + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("55785745fe13ad81a2c4a14373d091f0"));
|
||||
Arrays.asList("360610e4990860bb5c45249b8ac31e5b"));
|
||||
executeTest("test file has annotations, not asking for annotations, #1", spec);
|
||||
}
|
||||
|
||||
|
|
@ -24,7 +24,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testHasAnnotsNotAsking2() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " --variant " + privateTestDir + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
|
||||
Arrays.asList("d6f749f8dbeb2d42c9effaff9fe571d7"));
|
||||
Arrays.asList("d69a3c92a0e8f44e09e7377e3eaed4e8"));
|
||||
executeTest("test file has annotations, not asking for annotations, #2", spec);
|
||||
}
|
||||
|
||||
|
|
@ -32,7 +32,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testHasAnnotsAsking1() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("9084e6c7b1cec0f3a2c6d96711844d5e"));
|
||||
Arrays.asList("e0a08416249515ea18bd0663c90c9330"));
|
||||
executeTest("test file has annotations, asking for annotations, #1", spec);
|
||||
}
|
||||
|
||||
|
|
@ -40,7 +40,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testHasAnnotsAsking2() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
|
||||
Arrays.asList("3dfabdcaa2648ac34380fb71860c42d3"));
|
||||
Arrays.asList("0b60da46ba0eabb3abe5e0288937f9b0"));
|
||||
executeTest("test file has annotations, asking for annotations, #2", spec);
|
||||
}
|
||||
|
||||
|
|
@ -48,7 +48,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testNoAnnotsNotAsking1() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " --variant " + privateTestDir + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("b85c1ea28194484b327fbe0add1b5685"));
|
||||
Arrays.asList("540a9be8a8cb85b0f675fea1184bf78c"));
|
||||
executeTest("test file doesn't have annotations, not asking for annotations, #1", spec);
|
||||
}
|
||||
|
||||
|
|
@ -58,7 +58,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
// they don't get reordered. It's a good test of the genotype ordering system.
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " --variant " + privateTestDir + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
|
||||
Arrays.asList("fe4d4e2484c4cf8b1cd50ad42cfe468e"));
|
||||
Arrays.asList("f900e65b65ff0f9d9eb0891ef9b28c73"));
|
||||
executeTest("test file doesn't have annotations, not asking for annotations, #2", spec);
|
||||
}
|
||||
|
||||
|
|
@ -66,7 +66,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testNoAnnotsAsking1() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("043fc6205b0633edcd3fadc9e044800c"));
|
||||
Arrays.asList("5eb576d0234c912d8efea184492691d0"));
|
||||
executeTest("test file doesn't have annotations, asking for annotations, #1", spec);
|
||||
}
|
||||
|
||||
|
|
@ -74,7 +74,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testNoAnnotsAsking2() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
|
||||
Arrays.asList("6fafb42d374a67ba4687a23078a126af"));
|
||||
Arrays.asList("8860524d793d24b2e32f318433fcf527"));
|
||||
executeTest("test file doesn't have annotations, asking for annotations, #2", spec);
|
||||
}
|
||||
|
||||
|
|
@ -82,7 +82,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testExcludeAnnotations() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -G Standard -XA FisherStrand -XA ReadPosRankSumTest --variant " + privateTestDir + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("639462a0e0fa79e33def5f011fe55961"));
|
||||
Arrays.asList("f33f417fad98c05d9cd08ffa22943b0f"));
|
||||
executeTest("test exclude annotations", spec);
|
||||
}
|
||||
|
||||
|
|
@ -98,7 +98,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testNoReads() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample3empty.vcf -L " + privateTestDir + "vcfexample3empty.vcf", 1,
|
||||
Arrays.asList("afe6c9d3b4b80635a541cdfcfa48db2f"));
|
||||
Arrays.asList("1c423b7730b9805e7b885ece924286e0"));
|
||||
executeTest("not passing it any reads", spec);
|
||||
}
|
||||
|
||||
|
|
@ -106,7 +106,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testDBTagWithDbsnp() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " --dbsnp " + b36dbSNP129 + " -G Standard --variant " + privateTestDir + "vcfexample3empty.vcf -L " + privateTestDir + "vcfexample3empty.vcf", 1,
|
||||
Arrays.asList("21d696ea8c55d2fd4cbb4dcd5f7f7db6"));
|
||||
Arrays.asList("54d7d5bb9404652857adf5e50d995f30"));
|
||||
executeTest("getting DB tag with dbSNP", spec);
|
||||
}
|
||||
|
||||
|
|
@ -114,7 +114,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testMultipleIdsWithDbsnp() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " --alwaysAppendDbsnpId --dbsnp " + b36dbSNP129 + " -G Standard --variant " + privateTestDir + "vcfexample3withIDs.vcf -L " + privateTestDir + "vcfexample3withIDs.vcf", 1,
|
||||
Arrays.asList("ef95394c14d5c16682a322f3dfb9000c"));
|
||||
Arrays.asList("5fe63e511061ed4f91d938e72e7e3c39"));
|
||||
executeTest("adding multiple IDs with dbSNP", spec);
|
||||
}
|
||||
|
||||
|
|
@ -122,7 +122,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testDBTagWithHapMap() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " --comp:H3 " + privateTestDir + "fakeHM3.vcf -G Standard --variant " + privateTestDir + "vcfexample3empty.vcf -L " + privateTestDir + "vcfexample3empty.vcf", 1,
|
||||
Arrays.asList("e6e276b7d517d57626c8409589cd286f"));
|
||||
Arrays.asList("cc7184263975595a6e2473d153227146"));
|
||||
executeTest("getting DB tag with HM3", spec);
|
||||
}
|
||||
|
||||
|
|
@ -130,7 +130,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testNoQuals() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " --variant " + privateTestDir + "noQual.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L " + privateTestDir + "noQual.vcf -A QualByDepth", 1,
|
||||
Arrays.asList("a99e8315571ed1b6bce942451b3d8612"));
|
||||
Arrays.asList("aea983adc01cd059193538cc30adc17d"));
|
||||
executeTest("test file doesn't have QUALs", spec);
|
||||
}
|
||||
|
||||
|
|
@ -138,7 +138,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testUsingExpression() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " --resource:foo " + privateTestDir + "targetAnnotations.vcf -G Standard --variant " + privateTestDir + "vcfexample3empty.vcf -E foo.AF -L " + privateTestDir + "vcfexample3empty.vcf", 1,
|
||||
Arrays.asList("7d6ea3b54210620cbc7e14dad8836bcb"));
|
||||
Arrays.asList("2b0e8cdfd691779befc5ac123d1a1887"));
|
||||
executeTest("using expression", spec);
|
||||
}
|
||||
|
||||
|
|
@ -146,13 +146,13 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testUsingExpressionWithID() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " --resource:foo " + privateTestDir + "targetAnnotations.vcf -G Standard --variant " + privateTestDir + "vcfexample3empty.vcf -E foo.ID -L " + privateTestDir + "vcfexample3empty.vcf", 1,
|
||||
Arrays.asList("35ce4fb0288dfc5c01ec6ce8b14c6157"));
|
||||
Arrays.asList("3de1d1998203518098ffae233f3e2352"));
|
||||
executeTest("using expression with ID", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTabixAnnotations() {
|
||||
final String MD5 = "5aebcf8f76c649d645708b1262185c80";
|
||||
final String MD5 = "99938d1e197b8f10c408cac490a00a62";
|
||||
for ( String file : Arrays.asList("CEU.exon.2010_03.sites.vcf", "CEU.exon.2010_03.sites.vcf.gz")) {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -A HomopolymerRun --variant:vcf " + validationDataLocation + file + " -L " + validationDataLocation + "CEU.exon.2010_03.sites.vcf --no_cmdline_in_header", 1,
|
||||
|
|
@ -168,7 +168,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
validationDataLocation + "1kg_exomes_unfiltered.AFR.unfiltered.vcf --snpEffFile " + validationDataLocation +
|
||||
"snpEff2.0.5.AFR.unfiltered.vcf -L 1:1-1,500,000 -L 2:232,325,429",
|
||||
1,
|
||||
Arrays.asList("0c20cda1cf0b903a287f1807ae5bee02")
|
||||
Arrays.asList("d9291845ce5a8576898d293a829a05b7")
|
||||
);
|
||||
executeTest("Testing SnpEff annotations", spec);
|
||||
}
|
||||
|
|
@ -187,7 +187,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testTDTAnnotation() {
|
||||
final String MD5 = "81f85f0ce8cc36df7c717c478e100ba1";
|
||||
final String MD5 = "427dfdc665359b67eff210f909ebf8a2";
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T VariantAnnotator -R " + b37KGReference + " -A TransmissionDisequilibriumTest --variant:vcf " + privateTestDir + "ug.random50000.subset300bp.chr1.family.vcf" +
|
||||
" -L " + privateTestDir + "ug.random50000.subset300bp.chr1.family.vcf --no_cmdline_in_header -ped " + privateTestDir + "ug.random50000.family.ped -o %s", 1,
|
||||
|
|
@ -198,7 +198,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testChromosomeCountsPed() {
|
||||
final String MD5 = "9830fe2247651377e68ad0b0894e9a4e";
|
||||
final String MD5 = "6b5cbedf4a8b3385edf128d81c8a46f2";
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T VariantAnnotator -R " + b37KGReference + " -A ChromosomeCounts --variant:vcf " + privateTestDir + "ug.random50000.subset300bp.chr1.family.vcf" +
|
||||
" -L " + privateTestDir + "ug.random50000.subset300bp.chr1.family.vcf --no_cmdline_in_header -ped " + privateTestDir + "ug.random50000.family.ped -o %s", 1,
|
||||
|
|
@ -208,7 +208,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testInbreedingCoeffPed() {
|
||||
final String MD5 = "e94d589b5691e3ecfd9cc9475a384890";
|
||||
final String MD5 = "159a771c1deaeffb786097e106943893";
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T VariantAnnotator -R " + b37KGReference + " -A InbreedingCoeff --variant:vcf " + privateTestDir + "ug.random50000.subset300bp.chr1.family.vcf" +
|
||||
" -L " + privateTestDir + "ug.random50000.subset300bp.chr1.family.vcf --no_cmdline_in_header -ped " + privateTestDir + "ug.random50000.family.ped -o %s", 1,
|
||||
|
|
|
|||
|
|
@ -41,7 +41,7 @@ public class BeagleIntegrationTest extends WalkerTest {
|
|||
"--beagleR2:BEAGLE " + beagleValidationDataLocation + "inttestbgl.r2 " +
|
||||
"--beagleProbs:BEAGLE " + beagleValidationDataLocation + "inttestbgl.gprobs " +
|
||||
"--beaglePhased:BEAGLE " + beagleValidationDataLocation + "inttestbgl.phased " +
|
||||
"-o %s --no_cmdline_in_header --allowMissingVCFHeaders", 1, Arrays.asList("c5522304abf0633041c7772dd7dafcea"));
|
||||
"-o %s --no_cmdline_in_header -U LENIENT_VCF_PROCESSING", 1, Arrays.asList("c5522304abf0633041c7772dd7dafcea"));
|
||||
spec.disableShadowBCF();
|
||||
executeTest("test BeagleOutputToVCF", spec);
|
||||
}
|
||||
|
|
@ -51,7 +51,7 @@ public class BeagleIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T ProduceBeagleInput -R " + hg19Reference + " " +
|
||||
"--variant:VCF3 " + beagleValidationDataLocation + "inttestbgl.input.vcf " +
|
||||
"-o %s --allowMissingVCFHeaders", 1, Arrays.asList("f301b089d21da259873f04bdc468835d"));
|
||||
"-o %s -U LENIENT_VCF_PROCESSING", 1, Arrays.asList("f301b089d21da259873f04bdc468835d"));
|
||||
spec.disableShadowBCF();
|
||||
executeTest("test BeagleInput", spec);
|
||||
}
|
||||
|
|
@ -61,7 +61,7 @@ public class BeagleIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T ProduceBeagleInput --variant:VCF /humgen/gsa-hpprojects/GATK/data/Validation_Data/NA12878_HSQ_chr22_14-16m.vcf "+
|
||||
"--validation:VCF /humgen/gsa-hpprojects/GATK/data/Validation_Data/NA12878_OMNI_chr22_14-16m.vcf "+
|
||||
"-L 22:14000000-16000000 -o %s -bvcf %s -bs 0.8 --allowMissingVCFHeaders -valp 0.98 -R /humgen/1kg/reference/human_g1k_v37.fasta --no_cmdline_in_header ",2,
|
||||
"-L 22:14000000-16000000 -o %s -bvcf %s -bs 0.8 -U LENIENT_VCF_PROCESSING -valp 0.98 -R /humgen/1kg/reference/human_g1k_v37.fasta --no_cmdline_in_header ",2,
|
||||
Arrays.asList("660986891b30cdc937e0f2a3a5743faa","4b6417f892ccfe5c63b8a60cb0ef3740"));
|
||||
spec.disableShadowBCF();
|
||||
executeTest("test BeagleInputWithBootstrap",spec);
|
||||
|
|
@ -75,7 +75,7 @@ public class BeagleIntegrationTest extends WalkerTest {
|
|||
"--beagleR2:beagle /humgen/gsa-hpprojects/GATK/data/Validation_Data/EUR_beagle_in_test.r2 "+
|
||||
"--beagleProbs:beagle /humgen/gsa-hpprojects/GATK/data/Validation_Data/EUR_beagle_in_test.gprobs.bgl "+
|
||||
"--beaglePhased:beagle /humgen/gsa-hpprojects/GATK/data/Validation_Data/EUR_beagle_in_test.phased.bgl "+
|
||||
"-L 20:1-70000 -o %s --no_cmdline_in_header --allowMissingVCFHeaders",1,Arrays.asList("fbbbebfda35bab3f6f62eea2f0be1c01"));
|
||||
"-L 20:1-70000 -o %s --no_cmdline_in_header -U LENIENT_VCF_PROCESSING",1,Arrays.asList("fbbbebfda35bab3f6f62eea2f0be1c01"));
|
||||
spec.disableShadowBCF();
|
||||
executeTest("testBeagleChangesSitesToRef",spec);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,158 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* @author Mauricio Carneiro
|
||||
* @since 3/7/12
|
||||
*/
|
||||
public class BQSRKeyManagerUnitTest {
|
||||
RecalibrationArgumentCollection RAC;
|
||||
|
||||
@BeforeClass
|
||||
public void init() {
|
||||
RAC = new RecalibrationArgumentCollection();
|
||||
}
|
||||
|
||||
@Test(enabled = false)
|
||||
public void testCombineBitSets() {
|
||||
final int nRequired = 2;
|
||||
final ArrayList<Covariate> covariates = new ArrayList<Covariate>();
|
||||
covariates.add(new ReadGroupCovariate());
|
||||
covariates.add(new QualityScoreCovariate());
|
||||
covariates.add(new CycleCovariate());
|
||||
covariates.add(new ContextCovariate());
|
||||
createReadAndTest(covariates, nRequired);
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testOnlyRequiredCovariates() {
|
||||
final int nRequired = 2;
|
||||
final ArrayList<Covariate> covariates = new ArrayList<Covariate>(2);
|
||||
covariates.add(new ReadGroupCovariate());
|
||||
covariates.add(new QualityScoreCovariate());
|
||||
createReadAndTest(covariates, nRequired);
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testOnlyOneCovariate() {
|
||||
final int nRequired = 1;
|
||||
final ArrayList<Covariate> covariates = new ArrayList<Covariate>(2);
|
||||
covariates.add(new ReadGroupCovariate());
|
||||
createReadAndTest(covariates, nRequired);
|
||||
}
|
||||
|
||||
@Test(enabled = false)
|
||||
public void testOneCovariateWithOptionalCovariates() {
|
||||
final int nRequired = 1;
|
||||
final ArrayList<Covariate> covariates = new ArrayList<Covariate>(4);
|
||||
covariates.add(new ReadGroupCovariate());
|
||||
covariates.add(new QualityScoreCovariate());
|
||||
covariates.add(new CycleCovariate());
|
||||
covariates.add(new ContextCovariate());
|
||||
createReadAndTest(covariates, nRequired);
|
||||
}
|
||||
|
||||
private void createReadAndTest(List<Covariate> covariates, int nRequired) {
|
||||
int readLength = 1000;
|
||||
GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(ReadUtils.createRandomReadBases(readLength, true), ReadUtils.createRandomReadQuals(readLength), readLength + "M");
|
||||
read.setReadGroup(new GATKSAMReadGroupRecord("ID"));
|
||||
read.getReadGroup().setPlatform("illumina");
|
||||
|
||||
runTestOnRead(read, covariates, nRequired);
|
||||
read.setReadNegativeStrandFlag(true);
|
||||
runTestOnRead(read, covariates, nRequired);
|
||||
read.setReadPairedFlag(true);
|
||||
read.setSecondOfPairFlag(true);
|
||||
runTestOnRead(read, covariates, nRequired);
|
||||
read.setReadNegativeStrandFlag(false);
|
||||
runTestOnRead(read, covariates, nRequired);
|
||||
}
|
||||
|
||||
private void runTestOnRead(GATKSAMRecord read, List<Covariate> covariateList, int nRequired) {
|
||||
final long[][][] covariateKeys = new long[covariateList.size()][EventType.values().length][read.getReadLength()];
|
||||
ReadCovariates readCovariates = new ReadCovariates(read.getReadLength(), covariateList.size());
|
||||
for (int i = 0; i < covariateList.size(); i++) {
|
||||
final Covariate cov = covariateList.get(i);
|
||||
cov.initialize(RAC);
|
||||
readCovariates.setCovariateIndex(i);
|
||||
cov.recordValues(read, readCovariates);
|
||||
}
|
||||
for (int i = 0; i < read.getReadLength(); i++) {
|
||||
for (EventType eventType : EventType.values()) {
|
||||
final long[] vals = readCovariates.getKeySet(i, eventType);
|
||||
for (int j = 0; j < vals.length; j++)
|
||||
covariateKeys[j][eventType.index][i] = vals[j];
|
||||
}
|
||||
}
|
||||
|
||||
List<Covariate> requiredCovariates = new LinkedList<Covariate>();
|
||||
List<Covariate> optionalCovariates = new LinkedList<Covariate>();
|
||||
|
||||
for (int j=0; j<nRequired; j++)
|
||||
requiredCovariates.add(covariateList.get(j));
|
||||
for (int j=nRequired; j<covariateList.size(); j++)
|
||||
optionalCovariates.add(covariateList.get(j));
|
||||
|
||||
BQSRKeyManager keyManager = new BQSRKeyManager(requiredCovariates, optionalCovariates);
|
||||
|
||||
for (int l = 0; l < read.getReadLength(); l++) {
|
||||
for (EventType eventType : EventType.values()) {
|
||||
long[] keySet = new long[covariateList.size()];
|
||||
Object[] expectedRequired = new Object[covariateList.size()];
|
||||
Object[] expectedCovariate = new Object[covariateList.size()];
|
||||
|
||||
for (int j = 0; j < covariateList.size(); j++) {
|
||||
keySet[j] = covariateKeys[j][eventType.index][l];
|
||||
|
||||
if (j < nRequired)
|
||||
expectedRequired[j] = covariateList.get(j).formatKey(keySet[j]);
|
||||
else
|
||||
expectedCovariate[j - nRequired] = covariateList.get(j).formatKey(keySet[j]);
|
||||
}
|
||||
|
||||
if (optionalCovariates.size() == 0) {
|
||||
final long masterKey = keyManager.createMasterKey(keySet, eventType, -1);
|
||||
testKeys(keyManager, masterKey, nRequired, optionalCovariates, expectedRequired, expectedCovariate, eventType, -1);
|
||||
} else {
|
||||
for (int j = 0; j < optionalCovariates.size(); j++) {
|
||||
final long masterKey = keyManager.createMasterKey(keySet, eventType, j);
|
||||
testKeys(keyManager, masterKey, nRequired, optionalCovariates, expectedRequired, expectedCovariate, eventType, j);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void testKeys(final BQSRKeyManager keyManager, final long key, final int nRequired, final List<Covariate> optionalCovariates,
|
||||
final Object[] expectedRequired, final Object[] expectedCovariate, final EventType eventType, final int index) {
|
||||
|
||||
Object[] actual = keyManager.keySetFrom(key).toArray();
|
||||
|
||||
// Build the expected array
|
||||
Object[] expected = new Object[nRequired + (optionalCovariates.size() > 0 ? 3 : 1)];
|
||||
System.arraycopy(expectedRequired, 0, expected, 0, nRequired);
|
||||
if (optionalCovariates.size() > 0) {
|
||||
expected[expected.length-3] = expectedCovariate[index];
|
||||
expected[expected.length-2] = optionalCovariates.get(index).getClass().getSimpleName().split("Covariate")[0];
|
||||
}
|
||||
expected[expected.length-1] = eventType;
|
||||
|
||||
// System.out.println("Actual : " + Utils.join(",", Arrays.asList(actual)));
|
||||
// System.out.println("Expected: " + Utils.join(",", Arrays.asList(expected)));
|
||||
// System.out.println();
|
||||
|
||||
for (int k = 0; k < expected.length; k++)
|
||||
Assert.assertEquals(actual[k], expected[k]);
|
||||
}
|
||||
}
|
||||
|
|
@ -36,7 +36,7 @@ public class ContextCovariateUnitTest {
|
|||
verifyCovariateArray(readCovariates.getDeletionsKeySet(), RAC.INDELS_CONTEXT_SIZE, clippedRead, covariate);
|
||||
}
|
||||
|
||||
public static void verifyCovariateArray(long[][] values, int contextSize, GATKSAMRecord read, Covariate contextCovariate) {
|
||||
public static void verifyCovariateArray(int[][] values, int contextSize, GATKSAMRecord read, Covariate contextCovariate) {
|
||||
for (int i = 0; i < values.length; i++)
|
||||
Assert.assertEquals(contextCovariate.formatKey(values[i][0]), expectedContext(read, i, contextSize));
|
||||
|
||||
|
|
|
|||
|
|
@ -47,7 +47,7 @@ public class CycleCovariateUnitTest {
|
|||
verifyCovariateArray(readCovariates.getMismatchesKeySet(), -1, -1);
|
||||
}
|
||||
|
||||
private void verifyCovariateArray(long[][] values, int init, int increment) {
|
||||
private void verifyCovariateArray(int[][] values, int init, int increment) {
|
||||
for (short i = 0; i < values.length; i++) {
|
||||
short actual = Short.decode(covariate.formatKey(values[i][0]));
|
||||
int expected = init + (increment * i);
|
||||
|
|
|
|||
|
|
@ -46,8 +46,8 @@ public class ReadGroupCovariateUnitTest {
|
|||
|
||||
}
|
||||
|
||||
private void verifyCovariateArray(long[][] values, String expected) {
|
||||
for (long[] value : values) {
|
||||
private void verifyCovariateArray(int[][] values, String expected) {
|
||||
for (int[] value : values) {
|
||||
String actual = covariate.formatKey(value[0]);
|
||||
Assert.assertEquals(actual, expected);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,7 +1,9 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.collections.NestedHashMap;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
|
|
@ -32,7 +34,6 @@ public class RecalibrationReportUnitTest {
|
|||
|
||||
final QuantizationInfo quantizationInfo = new QuantizationInfo(quals, counts);
|
||||
final RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection();
|
||||
final LinkedHashMap<BQSRKeyManager, Map<Long, RecalDatum>> keysAndTablesMap = new LinkedHashMap<BQSRKeyManager, Map<Long, RecalDatum>>();
|
||||
|
||||
quantizationInfo.noQuantization();
|
||||
final List<Covariate> requiredCovariates = new LinkedList<Covariate>();
|
||||
|
|
@ -41,14 +42,10 @@ public class RecalibrationReportUnitTest {
|
|||
final ReadGroupCovariate rgCovariate = new ReadGroupCovariate();
|
||||
rgCovariate.initialize(RAC);
|
||||
requiredCovariates.add(rgCovariate);
|
||||
final BQSRKeyManager rgKeyManager = new BQSRKeyManager(requiredCovariates, optionalCovariates);
|
||||
keysAndTablesMap.put(rgKeyManager, new HashMap<Long, RecalDatum>());
|
||||
|
||||
final QualityScoreCovariate qsCovariate = new QualityScoreCovariate();
|
||||
qsCovariate.initialize(RAC);
|
||||
requiredCovariates.add(qsCovariate);
|
||||
final BQSRKeyManager qsKeyManager = new BQSRKeyManager(requiredCovariates, optionalCovariates);
|
||||
keysAndTablesMap.put(qsKeyManager, new HashMap<Long, RecalDatum>());
|
||||
|
||||
final ContextCovariate cxCovariate = new ContextCovariate();
|
||||
cxCovariate.initialize(RAC);
|
||||
|
|
@ -56,8 +53,6 @@ public class RecalibrationReportUnitTest {
|
|||
final CycleCovariate cyCovariate = new CycleCovariate();
|
||||
cyCovariate.initialize(RAC);
|
||||
optionalCovariates.add(cyCovariate);
|
||||
BQSRKeyManager cvKeyManager = new BQSRKeyManager(requiredCovariates, optionalCovariates);
|
||||
keysAndTablesMap.put(cvKeyManager, new HashMap<Long, RecalDatum>());
|
||||
|
||||
final Covariate[] requestedCovariates = new Covariate[requiredCovariates.size() + optionalCovariates.size()];
|
||||
int covariateIndex = 0;
|
||||
|
|
@ -75,34 +70,35 @@ public class RecalibrationReportUnitTest {
|
|||
readQuals[i] = 20;
|
||||
read.setBaseQualities(readQuals);
|
||||
|
||||
|
||||
final int expectedKeys = expectedNumberOfKeys(4, length, RAC.INDELS_CONTEXT_SIZE, RAC.MISMATCHES_CONTEXT_SIZE);
|
||||
int nKeys = 0; // keep track of how many keys were produced
|
||||
final ReadCovariates rc = RecalDataManager.computeCovariates(read, requestedCovariates);
|
||||
for (int offset = 0; offset < length; offset++) {
|
||||
for (Map.Entry<BQSRKeyManager, Map<Long, RecalDatum>> entry : keysAndTablesMap.entrySet()) {
|
||||
BQSRKeyManager keyManager = entry.getKey();
|
||||
Map<Long, RecalDatum> table = entry.getValue();
|
||||
|
||||
final int numOptionalCovariates = keyManager.getNumOptionalCovariates();
|
||||
if (numOptionalCovariates == 0) {
|
||||
table.put(keyManager.createMasterKey(rc.getMismatchesKeySet(offset), EventType.BASE_SUBSTITUTION, -1), RecalDatum.createRandomRecalDatum(10000, 10));
|
||||
table.put(keyManager.createMasterKey(rc.getMismatchesKeySet(offset), EventType.BASE_INSERTION, -1), RecalDatum.createRandomRecalDatum(100000, 10));
|
||||
table.put(keyManager.createMasterKey(rc.getMismatchesKeySet(offset), EventType.BASE_DELETION, -1), RecalDatum.createRandomRecalDatum(100000, 10));
|
||||
nKeys += 3;
|
||||
} else {
|
||||
for (int j = 0; j < numOptionalCovariates; j++) {
|
||||
table.put(keyManager.createMasterKey(rc.getMismatchesKeySet(offset), EventType.BASE_SUBSTITUTION, j), RecalDatum.createRandomRecalDatum(10000, 10));
|
||||
table.put(keyManager.createMasterKey(rc.getMismatchesKeySet(offset), EventType.BASE_INSERTION, j), RecalDatum.createRandomRecalDatum(100000, 10));
|
||||
table.put(keyManager.createMasterKey(rc.getMismatchesKeySet(offset), EventType.BASE_DELETION, j), RecalDatum.createRandomRecalDatum(100000, 10));
|
||||
nKeys += 3;
|
||||
}
|
||||
final NestedHashMap rgTable = new NestedHashMap();
|
||||
final NestedHashMap qualTable = new NestedHashMap();
|
||||
final NestedHashMap covTable = new NestedHashMap();
|
||||
|
||||
for (int offset = 0; offset < length; offset++) {
|
||||
|
||||
for (EventType errorMode : EventType.values()) {
|
||||
|
||||
final int[] covariates = rc.getKeySet(offset, errorMode);
|
||||
final int randomMax = errorMode == EventType.BASE_SUBSTITUTION ? 10000 : 100000;
|
||||
|
||||
rgTable.put(RecalDatum.createRandomRecalDatum(randomMax, 10), covariates[0], errorMode.index);
|
||||
qualTable.put(RecalDatum.createRandomRecalDatum(randomMax, 10), covariates[0], covariates[1], errorMode.index);
|
||||
nKeys += 2;
|
||||
for (int j = 0; j < optionalCovariates.size(); j++) {
|
||||
covTable.put(RecalDatum.createRandomRecalDatum(randomMax, 10), covariates[0], covariates[1], j, covariates[2 + j], errorMode.index);
|
||||
nKeys++;
|
||||
}
|
||||
}
|
||||
}
|
||||
Assert.assertEquals(nKeys, expectedKeys);
|
||||
|
||||
RecalibrationReport report = new RecalibrationReport(quantizationInfo, keysAndTablesMap, RAC.generateReportTable(), RAC);
|
||||
final RecalibrationTables recalibrationTables = new RecalibrationTables(rgTable, qualTable, covTable);
|
||||
|
||||
final RecalibrationReport report = new RecalibrationReport(quantizationInfo, recalibrationTables, RAC.generateReportTable(), RAC);
|
||||
|
||||
File output = new File("RecalibrationReportUnitTestOutuput.grp");
|
||||
PrintStream out;
|
||||
|
|
|
|||
|
|
@ -36,18 +36,19 @@ public class DiagnoseTargetsIntegrationTest extends WalkerTest {
|
|||
final String L = validationDataLocation + "DT-itest.interval_list";
|
||||
|
||||
private void DTTest(String testName, String args, String md5) {
|
||||
String base = String.format("-T DiagnoseTargets -R %s -L %s", REF, L) + " -o %s ";
|
||||
String base = String.format("-T DiagnoseTargets --no_cmdline_in_header -R %s -L %s", REF, L) + " -o %s ";
|
||||
WalkerTestSpec spec = new WalkerTestSpec(base + args, Arrays.asList(md5));
|
||||
spec.disableShadowBCF();
|
||||
executeTest(testName, spec);
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testSingleSample() {
|
||||
DTTest("testSingleSample ", "-I " + singleSample + " -max 75", "2df47009571fe83ead779c94be97fe96");
|
||||
DTTest("testSingleSample ", "-I " + singleSample + " -max 75", "ef71a569a48697c89e642cdda7bfb766");
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testMultiSample() {
|
||||
DTTest("testMultiSample ", "-I " + multiSample, "6f0c070b9671e1d007ce6374c3183014");
|
||||
DTTest("testMultiSample ", "-I " + multiSample, "1e6e15156e01e736274898fdac77d911");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -16,7 +16,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
|
|||
public void testNoAction() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " --variant " + privateTestDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("fbf88e25df30181ca5422a374c7b36fa"));
|
||||
Arrays.asList("a890cd298298e22bc04a2e5a20b71170"));
|
||||
executeTest("test no action", spec);
|
||||
}
|
||||
|
||||
|
|
@ -24,7 +24,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
|
|||
public void testClusteredSnps() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -window 10 --variant " + privateTestDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("bb69f49e9ef0054f0ccd6d38f5ffa46a"));
|
||||
Arrays.asList("f46b2fe2dbe6a423b5cfb10d74a4966d"));
|
||||
executeTest("test clustered SNPs", spec);
|
||||
}
|
||||
|
||||
|
|
@ -32,7 +32,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
|
|||
public void testMask1() {
|
||||
WalkerTestSpec spec1 = new WalkerTestSpec(
|
||||
baseTestString() + " -maskName foo --mask " + privateTestDir + "vcfexample2.vcf --variant " + privateTestDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("7e3225a32fcd6066901247992b2c5ca8"));
|
||||
Arrays.asList("86dbbf62a0623b2dc5e8969c26d8cb28"));
|
||||
executeTest("test mask all", spec1);
|
||||
}
|
||||
|
||||
|
|
@ -40,7 +40,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
|
|||
public void testMask2() {
|
||||
WalkerTestSpec spec2 = new WalkerTestSpec(
|
||||
baseTestString() + " -maskName foo --mask:VCF " + privateTestDir + "vcfMask.vcf --variant " + privateTestDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("3485fe95e3f0864c3575baf05cef4bcc"));
|
||||
Arrays.asList("2fb33fccda1eafeea7a2f8f9219baa39"));
|
||||
executeTest("test mask some", spec2);
|
||||
}
|
||||
|
||||
|
|
@ -48,7 +48,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
|
|||
public void testMask3() {
|
||||
WalkerTestSpec spec3 = new WalkerTestSpec(
|
||||
baseTestString() + " -maskName foo -maskExtend 10 --mask:VCF " + privateTestDir + "vcfMask.vcf --variant " + privateTestDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("367ab9c028a68e4eda2055e3bb8b486c"));
|
||||
Arrays.asList("4351e00bd9d821e37cded5a86100c973"));
|
||||
executeTest("test mask extend", spec3);
|
||||
}
|
||||
|
||||
|
|
@ -56,7 +56,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
|
|||
public void testFilter1() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -filter 'DoC < 20 || FisherStrand > 20.0' -filterName foo --variant " + privateTestDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("5a10d969e50a58d8dfbf1da54bf293df"));
|
||||
Arrays.asList("2f056b50a41c8e6ba7645ff4c777966d"));
|
||||
executeTest("test filter #1", spec);
|
||||
}
|
||||
|
||||
|
|
@ -64,7 +64,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
|
|||
public void testFilter2() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -filter 'AlleleBalance < 70.0 && FisherStrand == 1.4' -filterName bar --variant " + privateTestDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("886dbbca2350083819ff67224f6efbd6"));
|
||||
Arrays.asList("b2a8c1a5d99505be79c03120e9d75f2f"));
|
||||
executeTest("test filter #2", spec);
|
||||
}
|
||||
|
||||
|
|
@ -72,7 +72,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
|
|||
public void testFilterWithSeparateNames() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " --filterName ABF -filter 'AlleleBalance < 0.7' --filterName FSF -filter 'FisherStrand == 1.4' --variant " + privateTestDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("ee78c2e7128a8f9549233493c7cf6949"));
|
||||
Arrays.asList("e350d9789bbdf334c1677506590d0798"));
|
||||
executeTest("test filter with separate names #2", spec);
|
||||
}
|
||||
|
||||
|
|
@ -80,7 +80,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
|
|||
public void testGenotypeFilters1() {
|
||||
WalkerTestSpec spec1 = new WalkerTestSpec(
|
||||
baseTestString() + " -G_filter 'GQ == 0.60' -G_filterName foo --variant " + privateTestDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("285dd348c47c8c1e85d2886f9b33559e"));
|
||||
Arrays.asList("c5ed9dd3975b3602293bb484b4fda5f4"));
|
||||
executeTest("test genotype filter #1", spec1);
|
||||
}
|
||||
|
||||
|
|
@ -88,7 +88,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
|
|||
public void testGenotypeFilters2() {
|
||||
WalkerTestSpec spec2 = new WalkerTestSpec(
|
||||
baseTestString() + " -G_filter 'isHomVar == 1' -G_filterName foo --variant " + privateTestDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("a9c835a13eb72aa22d5e271894d8ac33"));
|
||||
Arrays.asList("979ccdf484259117aa31305701075602"));
|
||||
executeTest("test genotype filter #2", spec2);
|
||||
}
|
||||
|
||||
|
|
@ -96,7 +96,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
|
|||
public void testDeletions() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " --filterExpression 'QUAL < 100' --filterName foo --variant:VCF " + privateTestDir + "twoDeletions.vcf", 1,
|
||||
Arrays.asList("a1c02a5a90f1262e9eb3d2cad1fd08f2"));
|
||||
Arrays.asList("8077eb3bab5ff98f12085eb04176fdc9"));
|
||||
executeTest("test deletions", spec);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -29,7 +29,7 @@ public class PhaseByTransmissionIntegrationTest extends WalkerTest {
|
|||
"-o %s"
|
||||
),
|
||||
2,
|
||||
Arrays.asList("cd112ec37a9e28d366aff29a85fdcaa0","313cc749c7ee97713e4551de39e01ac5")
|
||||
Arrays.asList("cd112ec37a9e28d366aff29a85fdcaa0","f8721f4f5d3bae2848ae15c3f120709b")
|
||||
);
|
||||
executeTest("testTrueNegativeMV", spec);
|
||||
}
|
||||
|
|
@ -48,7 +48,7 @@ public class PhaseByTransmissionIntegrationTest extends WalkerTest {
|
|||
"-o %s"
|
||||
),
|
||||
2,
|
||||
Arrays.asList("27ccd6feb51de7e7dcdf35f4697fa4eb","dd90dad9fd11e1b16e6660c3ca0553e7")
|
||||
Arrays.asList("27ccd6feb51de7e7dcdf35f4697fa4eb","547fdfef393f3045a96d245ef6af8acb")
|
||||
);
|
||||
executeTest("testTruePositiveMV", spec);
|
||||
}
|
||||
|
|
@ -67,7 +67,7 @@ public class PhaseByTransmissionIntegrationTest extends WalkerTest {
|
|||
"-o %s"
|
||||
),
|
||||
2,
|
||||
Arrays.asList("719d681bb0a52a40bc854bba107c5c94","b35a86d2cad17f0db7b5e84ddc0e5545")
|
||||
Arrays.asList("719d681bb0a52a40bc854bba107c5c94","9529e2bf214d72e792d93fbea22a3b91")
|
||||
);
|
||||
executeTest("testFalsePositiveMV", spec);
|
||||
}
|
||||
|
|
@ -86,7 +86,7 @@ public class PhaseByTransmissionIntegrationTest extends WalkerTest {
|
|||
"-o %s"
|
||||
),
|
||||
2,
|
||||
Arrays.asList("7f4a277aee2c7398fcfa84d6c98d5fb3","c53b5fd377bef48e9c6035a94db398db")
|
||||
Arrays.asList("7f4a277aee2c7398fcfa84d6c98d5fb3","8c157d79dd00063d2932f0d2b96f53d8")
|
||||
);
|
||||
executeTest("testSpecialCases", spec);
|
||||
}
|
||||
|
|
@ -108,7 +108,7 @@ public class PhaseByTransmissionIntegrationTest extends WalkerTest {
|
|||
"-o %s"
|
||||
),
|
||||
2,
|
||||
Arrays.asList("44e09d2f9e4d8a9488226d03a97fe999","6f596470740e1a57679bbb38c0126364")
|
||||
Arrays.asList("44e09d2f9e4d8a9488226d03a97fe999","343e418850ae4a687ebef2acd55fcb07")
|
||||
);
|
||||
executeTest("testPriorOption", spec);
|
||||
}
|
||||
|
|
@ -128,7 +128,7 @@ public class PhaseByTransmissionIntegrationTest extends WalkerTest {
|
|||
"-o %s"
|
||||
),
|
||||
1,
|
||||
Arrays.asList("b35a86d2cad17f0db7b5e84ddc0e5545")
|
||||
Arrays.asList("9529e2bf214d72e792d93fbea22a3b91")
|
||||
);
|
||||
executeTest("testMVFileOption", spec);
|
||||
}
|
||||
|
|
@ -149,7 +149,7 @@ public class PhaseByTransmissionIntegrationTest extends WalkerTest {
|
|||
"-fatherAlleleFirst"
|
||||
),
|
||||
2,
|
||||
Arrays.asList("60ced3d078792a150a03640b62926857","6d550784382aa910f78b533d889c91c0")
|
||||
Arrays.asList("60ced3d078792a150a03640b62926857","52ffa82428e63ade22ea37b72ae58492")
|
||||
);
|
||||
executeTest("testFatherAlleleFirst", spec);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -26,7 +26,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest {
|
|||
baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 10, 10)
|
||||
+ " -L chr20:332341-382503",
|
||||
1,
|
||||
Arrays.asList("442c819569417c1b7d6be9f41ce05394"));
|
||||
Arrays.asList("1c9a7fe4db41864cd85d16e5cf88986c"));
|
||||
executeTest("MAX 10 het sites [TEST ONE]; require PQ >= 10", spec);
|
||||
}
|
||||
|
||||
|
|
@ -36,7 +36,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest {
|
|||
baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 10, 10)
|
||||
+ " -L chr20:1232503-1332503",
|
||||
1,
|
||||
Arrays.asList("2a51ee7d3c024f2410dcee40c5412993"));
|
||||
Arrays.asList("a3ca151145379e0d4bae64a91165ea0b"));
|
||||
executeTest("MAX 10 het sites [TEST TWO]; require PQ >= 10", spec);
|
||||
}
|
||||
|
||||
|
|
@ -46,7 +46,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest {
|
|||
baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 2, 30)
|
||||
+ " -L chr20:332341-382503",
|
||||
1,
|
||||
Arrays.asList("85bc9b03e24159f746dbd0cb988f9ec8"));
|
||||
Arrays.asList("f685803333123a102ce1851d984cbd10"));
|
||||
executeTest("MAX 2 het sites [TEST THREE]; require PQ >= 30", spec);
|
||||
}
|
||||
|
||||
|
|
@ -56,7 +56,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest {
|
|||
baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 5, 100)
|
||||
+ " -L chr20:332341-382503",
|
||||
1,
|
||||
Arrays.asList("96bb413a83c777ebbe622438e4565e8f"));
|
||||
Arrays.asList("aaa7c25d118383639f273128d241e140"));
|
||||
executeTest("MAX 5 het sites [TEST FOUR]; require PQ >= 100", spec);
|
||||
}
|
||||
|
||||
|
|
@ -66,7 +66,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest {
|
|||
baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 1000, 7, 10)
|
||||
+ " -L chr20:332341-482503",
|
||||
1,
|
||||
Arrays.asList("7d2402f055d243e2208db9ea47973e13"));
|
||||
Arrays.asList("418e29400762972e77bae4f73e16befe"));
|
||||
executeTest("MAX 7 het sites [TEST FIVE]; require PQ >= 10; cacheWindow = 1000", spec);
|
||||
}
|
||||
|
||||
|
|
@ -76,7 +76,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest {
|
|||
baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 10, 10)
|
||||
+ " -L chr20:652810-681757",
|
||||
1,
|
||||
Arrays.asList("72682b3f27c33580d2d4515653ba6de7"));
|
||||
Arrays.asList("4c8f6190ecc86766baba3aba08542991"));
|
||||
executeTest("MAX 10 het sites [TEST SIX]; require PQ >= 10; cacheWindow = 20000; has inconsistent sites", spec);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -7,10 +7,6 @@ import org.testng.annotations.DataProvider;
|
|||
import java.util.*;
|
||||
|
||||
public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
|
||||
static HashMap<String, String> clusterFiles = new HashMap<String, String>();
|
||||
static HashMap<String, String> tranchesFiles = new HashMap<String, String>();
|
||||
static HashMap<String, String> inputVCFFiles = new HashMap<String, String>();
|
||||
|
||||
private static class VRTest {
|
||||
String inVCF;
|
||||
String tranchesMD5;
|
||||
|
|
@ -27,7 +23,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
VRTest lowPass = new VRTest("phase1.projectConsensus.chr20.raw.snps.vcf",
|
||||
"0ddd1e0e483d2eaf56004615cea23ec7", // tranches
|
||||
"b9709e4180e56abc691b208bd3e8626c", // recal file
|
||||
"c58ff4140e8914f0b656ed625c7f73b9"); // cut VCF
|
||||
"4c73ff0c8c5ae0055bfacf33329a2406"); // cut VCF
|
||||
|
||||
@DataProvider(name = "VRTest")
|
||||
public Object[][] createData1() {
|
||||
|
|
@ -54,6 +50,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
" -recalFile %s" +
|
||||
" -tranchesFile %s",
|
||||
Arrays.asList(params.recalMD5, params.tranchesMD5));
|
||||
spec.disableShadowBCF(); // TODO -- enable when we support symbolic alleles
|
||||
executeTest("testVariantRecalibrator-"+params.inVCF, spec).getFirst();
|
||||
}
|
||||
|
||||
|
|
@ -65,17 +62,18 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
" -L 20:12,000,000-30,000,000" +
|
||||
" --no_cmdline_in_header" +
|
||||
" -input " + params.inVCF +
|
||||
" -o %s" +
|
||||
" -U LENIENT_VCF_PROCESSING -o %s" +
|
||||
" -tranchesFile " + getMd5DB().getMD5FilePath(params.tranchesMD5, null) +
|
||||
" -recalFile " + getMd5DB().getMD5FilePath(params.recalMD5, null),
|
||||
Arrays.asList(params.cutVCFMD5));
|
||||
spec.disableShadowBCF(); // TODO -- enable when we support symbolic alleles
|
||||
executeTest("testApplyRecalibration-"+params.inVCF, spec);
|
||||
}
|
||||
|
||||
VRTest indel = new VRTest("combined.phase1.chr20.raw.indels.sites.vcf",
|
||||
"da4458d05f6396f5c4ab96f274e5ccdc", // tranches
|
||||
"a04a9001f62eff43d363f4d63769f3ee", // recal file
|
||||
"05e88052e0798f1c1e83f0a8938bce56"); // cut VCF
|
||||
"b9936d2432d3c85b2d8b5b7aa17d0950"); // cut VCF
|
||||
|
||||
@DataProvider(name = "VRIndelTest")
|
||||
public Object[][] createData2() {
|
||||
|
|
@ -101,6 +99,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
" -recalFile %s" +
|
||||
" -tranchesFile %s",
|
||||
Arrays.asList(params.recalMD5, params.tranchesMD5));
|
||||
spec.disableShadowBCF(); // TODO -- enable when we support symbolic alleles
|
||||
executeTest("testVariantRecalibratorIndel-"+params.inVCF, spec).getFirst();
|
||||
}
|
||||
|
||||
|
|
@ -111,12 +110,13 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
" -T ApplyRecalibration" +
|
||||
" -L 20:12,000,000-30,000,000" +
|
||||
" -mode INDEL" +
|
||||
" --no_cmdline_in_header" +
|
||||
" -U LENIENT_VCF_PROCESSING --no_cmdline_in_header" +
|
||||
" -input " + params.inVCF +
|
||||
" -o %s" +
|
||||
" -tranchesFile " + getMd5DB().getMD5FilePath(params.tranchesMD5, null) +
|
||||
" -recalFile " + getMd5DB().getMD5FilePath(params.recalMD5, null),
|
||||
Arrays.asList(params.cutVCFMD5));
|
||||
spec.disableShadowBCF(); // TODO -- enable when we support symbolic alleles
|
||||
executeTest("testApplyRecalibrationIndel-"+params.inVCF, spec);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -38,14 +38,14 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
|
|||
// TODO TODO TODO TODO TODO TODO TODO TODO
|
||||
// TODO TODO TODO TODO TODO TODO TODO TODO
|
||||
//
|
||||
// TODO WHEN THE HC EMITS VALID VCF HEADERS ENABLE BCF AND REMOVE allowMissingVCFHeaders ARGUMENTS
|
||||
// TODO WHEN THE HC EMITS VALID VCF HEADERS ENABLE BCF AND REMOVE lenientVCFProcessing ARGUMENTS
|
||||
//
|
||||
// TODO TODO TODO TODO TODO TODO TODO TODO
|
||||
// TODO TODO TODO TODO TODO TODO TODO TODO
|
||||
// TODO TODO TODO TODO TODO TODO TODO TODO
|
||||
//
|
||||
private static String baseTestString(String args) {
|
||||
return "-T CombineVariants --no_cmdline_in_header -L 1:1-50,000,000 -o %s --allowMissingVCFHeaders -R " + b36KGReference + args;
|
||||
return "-T CombineVariants --no_cmdline_in_header -L 1:1-50,000,000 -o %s -U LENIENT_VCF_PROCESSING -R " + b36KGReference + args;
|
||||
}
|
||||
|
||||
private void cvExecuteTest(final String name, final WalkerTestSpec spec) {
|
||||
|
|
@ -142,17 +142,17 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
|
|||
cvExecuteTest("combineComplexSites 1:" + new File(file1).getName() + " 2:" + new File(file2).getName() + " args = " + args, spec);
|
||||
}
|
||||
|
||||
@Test public void complexTestFull() { combineComplexSites("", "8b19b54516b59de40992f0c4b328258a"); }
|
||||
@Test public void complexTestMinimal() { combineComplexSites(" -minimalVCF", "a38dd097adc37420fe36ef8be14cfded"); }
|
||||
@Test public void complexTestSitesOnly() { combineComplexSites(" -sites_only", "a3957dac9a617f50ce2668607e3baef0"); }
|
||||
@Test public void complexTestSitesOnlyMinimal() { combineComplexSites(" -sites_only -minimalVCF", "a3957dac9a617f50ce2668607e3baef0"); }
|
||||
@Test public void complexTestFull() { combineComplexSites("", "151a4970367dd3e73ba3e7f3c2f874f6"); }
|
||||
@Test public void complexTestMinimal() { combineComplexSites(" -minimalVCF", "c0625e092b878b3d3eb1703c48e216b7"); }
|
||||
@Test public void complexTestSitesOnly() { combineComplexSites(" -sites_only", "6978329d6a1033ac16f83b49072c679b"); }
|
||||
@Test public void complexTestSitesOnlyMinimal() { combineComplexSites(" -sites_only -minimalVCF", "6978329d6a1033ac16f83b49072c679b"); }
|
||||
|
||||
@Test
|
||||
public void combineDBSNPDuplicateSites() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T CombineVariants --no_cmdline_in_header -L 1:902000-903000 -o %s -R " + b37KGReference + " -V:v1 " + b37dbSNP132,
|
||||
1,
|
||||
Arrays.asList("3d2a5a43db86e3f6217ed2a63251285b"));
|
||||
Arrays.asList("aa926eae333208dc1f41fe69dc95d7a6"));
|
||||
cvExecuteTest("combineDBSNPDuplicateSites:", spec);
|
||||
}
|
||||
}
|
||||
|
|
@ -39,9 +39,9 @@ public class CombineVariantsUnitTest {
|
|||
"##fileformat=VCFv4.0\n"+
|
||||
"##filedate=2010-06-21\n"+
|
||||
"##reference=NCBI36\n"+
|
||||
"##INFO=<ID=GC, Number=0, Type=Flag, Description=\"Overlap with Gencode CCDS coding sequence\">\n"+
|
||||
"##INFO=<ID=GC, Number=1, Type=String, Description=\"Overlap with Gencode CCDS coding sequence\">\n"+
|
||||
"##INFO=<ID=DP, Number=1, Type=Integer, Description=\"Total number of reads in haplotype window\">\n"+
|
||||
"##INFO=<ID=AF, Number=1, Type=String, Description=\"Dindel estimated population allele frequency\">\n"+ // string to integer
|
||||
"##INFO=<ID=AF, Number=1, Type=Float, Description=\"Dindel estimated population allele frequency\">\n"+ // string to integer
|
||||
"##FILTER=<ID=NoQCALL, Description=\"Variant called by Dindel but not confirmed by QCALL\">\n"+
|
||||
"##FORMAT=<ID=GT, Number=1, Type=String, Description=\"Genotype\">\n"+
|
||||
"##FORMAT=<ID=HQ, Number=2, Type=Integer, Description=\"Haplotype quality\">\n"+
|
||||
|
|
|
|||
|
|
@ -40,7 +40,7 @@ public class LiftoverVariantsIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T LiftoverVariants -o %s -R " + b36KGReference + " --variant " + privateTestDir + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.500.noheader.vcf -chain " + validationDataLocation + "b36ToHg19.broad.over.chain -dict /seq/references/Homo_sapiens_assembly19/v0/Homo_sapiens_assembly19.dict",
|
||||
1,
|
||||
Arrays.asList("a139480c004859452d4095fe4859b42e"));
|
||||
Arrays.asList("7d5f91fcf419211ae9eca6c66dcec0e6"));
|
||||
executeTest("test b36 to hg19", spec);
|
||||
}
|
||||
|
||||
|
|
@ -49,7 +49,7 @@ public class LiftoverVariantsIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T LiftoverVariants -o %s -R " + b36KGReference + " --variant " + privateTestDir + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.500.noheader.unsortedSamples.vcf -chain " + validationDataLocation + "b36ToHg19.broad.over.chain -dict /seq/references/Homo_sapiens_assembly19/v0/Homo_sapiens_assembly19.dict",
|
||||
1,
|
||||
Arrays.asList("91344768f1e98c979364ec0d5d3aa9d6"));
|
||||
Arrays.asList("29dab3555e7f1ee6a60e267b00215a11"));
|
||||
executeTest("test b36 to hg19, unsorted samples", spec);
|
||||
}
|
||||
|
||||
|
|
@ -58,7 +58,7 @@ public class LiftoverVariantsIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T LiftoverVariants -o %s -R " + hg18Reference + " --variant:vcf " + privateTestDir + "liftover_test.vcf -chain " + validationDataLocation + "hg18ToHg19.broad.over.chain -dict /seq/references/Homo_sapiens_assembly19/v0/Homo_sapiens_assembly19.dict",
|
||||
1,
|
||||
Arrays.asList("e0b813ff873185ab51995a151f80ec98"));
|
||||
Arrays.asList("7e7bad0e1890753a01303c09a38ceb8d"));
|
||||
executeTest("test hg18 to hg19, unsorted", spec);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -18,7 +18,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T SelectVariants -R " + hg19Reference + " -L 20:1012700-1020000 --variant "
|
||||
+ b37hapmapGenotypes + " -disc " + testFile
|
||||
+ " -o %s --no_cmdline_in_header --allowMissingVCFHeaders --allowMissingVCFHeaders",
|
||||
+ " -o %s --no_cmdline_in_header -U LENIENT_VCF_PROCESSING",
|
||||
1,
|
||||
Arrays.asList("d88bdae45ae0e74e8d8fd196627e612c")
|
||||
);
|
||||
|
|
@ -34,7 +34,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString(" -sn A -sn B -sn C --variant " + testfile),
|
||||
1,
|
||||
Arrays.asList("337bb7fc23153cf67acc42a466834775")
|
||||
Arrays.asList("3d98a024bf3aecbd282843e0af89d0e6")
|
||||
);
|
||||
|
||||
executeTest("testRepeatedLineSelection--" + testfile, spec);
|
||||
|
|
@ -47,7 +47,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T SelectVariants -R " + hg19Reference + " -sn NA12878 -L 20:1012700-1020000 --variant "
|
||||
+ b37hapmapGenotypes + " -disc " + testFile
|
||||
+ " -o %s --no_cmdline_in_header --allowMissingVCFHeaders",
|
||||
+ " -o %s --no_cmdline_in_header -U LENIENT_VCF_PROCESSING",
|
||||
1,
|
||||
Arrays.asList("54289033d35d32b8ebbb38c51fbb614c")
|
||||
);
|
||||
|
|
@ -64,7 +64,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString(" -sn A -se '[CDH]' -sf " + samplesFile + " -env -ef -select 'DP < 250' --variant " + testfile),
|
||||
1,
|
||||
Arrays.asList("ad0514b723ee1479d861291622bd4311")
|
||||
Arrays.asList("433eccaf1ac6e6be500ef0984a5d8d8b")
|
||||
);
|
||||
spec.disableShadowBCF();
|
||||
executeTest("testComplexSelection--" + testfile, spec);
|
||||
|
|
@ -78,7 +78,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T SelectVariants -R " + b36KGReference + " -L 1:1-1000000 -o %s --no_cmdline_in_header -xl_sn A -xl_sf " + samplesFile + " --variant " + testfile,
|
||||
1,
|
||||
Arrays.asList("bc0e00d0629b2bc6799e7e9db0dc775c")
|
||||
Arrays.asList("1f5c72951a35667c4bdf1be153787e27")
|
||||
);
|
||||
spec.disableShadowBCF();
|
||||
|
||||
|
|
@ -93,7 +93,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T SelectVariants -R " + hg19Reference + " -sn NA12878 -L 20:1012700-1020000 -conc "
|
||||
+ b37hapmapGenotypes + " --variant " + testFile
|
||||
+ " -o %s --no_cmdline_in_header --allowMissingVCFHeaders",
|
||||
+ " -o %s --no_cmdline_in_header -U LENIENT_VCF_PROCESSING",
|
||||
1,
|
||||
Arrays.asList("946e7f2e0ae08dc0e931c1634360fc46")
|
||||
);
|
||||
|
|
@ -109,7 +109,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T SelectVariants -R " + b36KGReference + " -restrictAllelesTo MULTIALLELIC -selectType MIXED --variant " + testFile + " -o %s --no_cmdline_in_header",
|
||||
1,
|
||||
Arrays.asList("a111642779b05de33ad04073d6022c21")
|
||||
Arrays.asList("ca2b70e3171420b08b0a2659bfe2a794")
|
||||
);
|
||||
|
||||
executeTest("testVariantTypeSelection--" + testFile, spec);
|
||||
|
|
@ -161,7 +161,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T SelectVariants -R " + b37KGReference + " --variant " + testFile + " -o %s --no_cmdline_in_header",
|
||||
1,
|
||||
Arrays.asList("a0b7f77edc16df0992d2c1363136a17e")
|
||||
Arrays.asList("ef3c5f75074a5dd2b2cd2715856a2542")
|
||||
);
|
||||
|
||||
executeTest("testNoGTs--" + testFile, spec);
|
||||
|
|
@ -176,7 +176,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
|||
spec = new WalkerTestSpec(
|
||||
baseTestString(" -sn A -se '[CDH]' -sf " + samplesFile + " -env -ef -select 'DP < 250' --variant " + testfile + " -nt 2"),
|
||||
1,
|
||||
Arrays.asList("ad0514b723ee1479d861291622bd4311")
|
||||
Arrays.asList("433eccaf1ac6e6be500ef0984a5d8d8b")
|
||||
);
|
||||
spec.disableShadowBCF();
|
||||
executeTest("testParallelization (2 threads)--" + testfile, spec);
|
||||
|
|
@ -190,7 +190,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
|||
spec = new WalkerTestSpec(
|
||||
baseTestString(" -sn A -se '[CDH]' -sf " + samplesFile + " -env -ef -select 'DP < 250' --variant " + testfile + " -nt 4"),
|
||||
1,
|
||||
Arrays.asList("ad0514b723ee1479d861291622bd4311")
|
||||
Arrays.asList("433eccaf1ac6e6be500ef0984a5d8d8b")
|
||||
);
|
||||
spec.disableShadowBCF();
|
||||
|
||||
|
|
@ -204,7 +204,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T SelectVariants -R " + b37KGReference + " -o %s --no_cmdline_in_header -sf " + samplesFile + " --excludeNonVariants --variant " + testfile,
|
||||
1,
|
||||
Arrays.asList("9acd6effcc78bfb832bed5edfd6a1b5b")
|
||||
Arrays.asList("3ab35d5e81a29fb5db3e2add11c7e823")
|
||||
);
|
||||
executeTest("test select from multi allelic with excludeNonVariants --" + testfile, spec);
|
||||
}
|
||||
|
|
@ -223,7 +223,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
|||
final String testFile = privateTestDir + "missingHeaderLine.vcf";
|
||||
final String cmd = "-T SelectVariants -R " + b36KGReference + " -sn NA12892 --variant:dbsnp "
|
||||
+ testFile + " -o %s --no_cmdline_in_header"
|
||||
+ (expectedException == null ? " -allowMissingVCFHeaders" : "");
|
||||
+ (expectedException == null ? " -U LENIENT_VCF_PROCESSING" : "");
|
||||
WalkerTestSpec spec =
|
||||
expectedException != null
|
||||
? new WalkerTestSpec(cmd, 1, expectedException)
|
||||
|
|
|
|||
|
|
@ -60,7 +60,7 @@ public class VCFStreamingIntegrationTest extends WalkerTest {
|
|||
" --no_cmdline_in_header " +
|
||||
" -o %s",
|
||||
1,
|
||||
Arrays.asList("2cdcd9e140eb1b6da7e365e37dd7d859")
|
||||
Arrays.asList("283f434b3efbebb8e10ed6347f97d104")
|
||||
);
|
||||
|
||||
executeTest("testSimpleVCFStreaming", spec);
|
||||
|
|
|
|||
|
|
@ -89,7 +89,7 @@ public class VariantsToVCFIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testGenotypesToVCFUsingVCFInput() {
|
||||
List<String> md5 = new ArrayList<String>();
|
||||
md5.add("95898aad8c9f9515c0e668e2fb65a024");
|
||||
md5.add("21084d32ce7ac5df3cee1730bfaaf71c");
|
||||
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-R " + b36KGReference +
|
||||
|
|
|
|||
|
|
@ -1,8 +1,6 @@
|
|||
package org.broadinstitute.sting.utils;
|
||||
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.walkers.bqsr.BQSRKeyManager;
|
||||
import org.broadinstitute.sting.gatk.walkers.bqsr.ContextCovariate;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
import org.testng.annotations.Test;
|
||||
|
|
@ -59,19 +57,4 @@ public class BitSetUtilsUnitTest {
|
|||
//for (String d : dna)
|
||||
// Assert.assertEquals(BitSetUtils.dnaFrom(BitSetUtils.bitSetFrom(d)), d);
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testNumberOfBitsToRepresent() {
|
||||
Assert.assertEquals(BQSRKeyManager.numberOfBitsToRepresent(0), 0); // Make sure 0 elements need 0 bits to be represented
|
||||
Assert.assertEquals(BQSRKeyManager.numberOfBitsToRepresent(1), 1); // Make sure 1 element needs 1 bit to be represented
|
||||
Assert.assertEquals(BQSRKeyManager.numberOfBitsToRepresent(3), 2); // Make sure 3 elements need 2 bit to be represented
|
||||
|
||||
for (int i = 1; i < 63; i++) { // Can't test i == 63 because n1 is a negative number
|
||||
long n1 = 1L << i;
|
||||
long n2 = Math.abs(random.nextLong()) % n1;
|
||||
long n3 = n1 | n2;
|
||||
Assert.assertEquals(BQSRKeyManager.numberOfBitsToRepresent(n3), (n3 == n1) ? i : i + 1);
|
||||
Assert.assertEquals(BQSRKeyManager.numberOfBitsToRepresent(n1), i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -480,7 +480,8 @@ public class BCF2EncoderDecoderUnitTest extends BaseTest {
|
|||
final byte typeDescriptor = decoder.readTypeDescriptor();
|
||||
|
||||
// read the int[] with the low-level version
|
||||
final int[] decoded = decoder.decodeIntArray(typeDescriptor);
|
||||
final int size = decoder.decodeNumberOfElements(typeDescriptor);
|
||||
final int[] decoded = decoder.decodeIntArray(typeDescriptor, size);
|
||||
|
||||
if ( isMissing(ints) ) {
|
||||
// we expect that the result is null in this case
|
||||
|
|
|
|||
|
|
@ -33,13 +33,13 @@ public class VCFHeaderUnitTest extends BaseTest {
|
|||
@Test
|
||||
public void testVCF4ToVCF4() {
|
||||
VCFHeader header = createHeader(VCF4headerStrings);
|
||||
checkMD5ofHeaderFile(header, "47d32e7901650ba69ed41322af638806");
|
||||
checkMD5ofHeaderFile(header, "f05a57053a0c6a5bac15dba566f7f7ff");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testVCF4ToVCF4_alternate() {
|
||||
VCFHeader header = createHeader(VCF4headerStrings_with_negativeOne);
|
||||
checkMD5ofHeaderFile(header, "954e9dd756d5f952cfb401a4db6bd145");
|
||||
checkMD5ofHeaderFile(header, "b1d71cc94261053131f8d239d65a8c9f");
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -112,7 +112,7 @@ public class VCFHeaderUnitTest extends BaseTest {
|
|||
"##reference=NCBI36\n"+
|
||||
"##INFO=<ID=GC, Number=0, Type=Flag, Description=\"Overlap with Gencode CCDS coding sequence\">\n"+
|
||||
"##INFO=<ID=DP, Number=1, Type=Integer, Description=\"Total number of reads in haplotype window\">\n"+
|
||||
"##INFO=<ID=AF, Number=1, Type=Float, Description=\"Dindel estimated population allele frequency\">\n"+
|
||||
"##INFO=<ID=AF, Number=A, Type=Float, Description=\"Dindel estimated population allele frequency\">\n"+
|
||||
"##INFO=<ID=CA, Number=1, Type=String, Description=\"Pilot 1 callability mask\">\n"+
|
||||
"##INFO=<ID=HP, Number=1, Type=Integer, Description=\"Reference homopolymer tract length\">\n"+
|
||||
"##INFO=<ID=NS, Number=1, Type=Integer, Description=\"Number of samples with data\">\n"+
|
||||
|
|
@ -132,7 +132,7 @@ public class VCFHeaderUnitTest extends BaseTest {
|
|||
"##reference=NCBI36\n"+
|
||||
"##INFO=<ID=GC, Number=0, Type=Flag, Description=\"Overlap with Gencode CCDS coding sequence\">\n"+
|
||||
"##INFO=<ID=YY, Number=., Type=Integer, Description=\"Some weird value that has lots of parameters\">\n"+
|
||||
"##INFO=<ID=AF, Number=1, Type=Float, Description=\"Dindel estimated population allele frequency\">\n"+
|
||||
"##INFO=<ID=AF, Number=A, Type=Float, Description=\"Dindel estimated population allele frequency\">\n"+
|
||||
"##INFO=<ID=CA, Number=1, Type=String, Description=\"Pilot 1 callability mask\">\n"+
|
||||
"##INFO=<ID=HP, Number=1, Type=Integer, Description=\"Reference homopolymer tract length\">\n"+
|
||||
"##INFO=<ID=NS, Number=1, Type=Integer, Description=\"Number of samples with data\">\n"+
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ public class VCFIntegrationTest extends WalkerTest {
|
|||
@Test(enabled = true)
|
||||
public void testReadingAndWritingWitHNoChanges() {
|
||||
|
||||
String md5ofInputVCF = "babf02baabcfa7f72a2c6f7da5fdc996";
|
||||
String md5ofInputVCF = "d991abe6c6a7a778a60a667717903be0";
|
||||
String testVCF = privateTestDir + "vcf4.1.example.vcf";
|
||||
|
||||
String baseCommand = "-R " + b37KGReference + " --no_cmdline_in_header -o %s ";
|
||||
|
|
@ -30,12 +30,11 @@ public class VCFIntegrationTest extends WalkerTest {
|
|||
// See https://getsatisfaction.com/gsa/topics/support_vcf_4_1_structural_variation_breakend_alleles?utm_content=topic_link&utm_medium=email&utm_source=new_topic
|
||||
public void testReadingAndWritingBreakpointAlleles() {
|
||||
String testVCF = privateTestDir + "breakpoint-example.vcf";
|
||||
//String testVCF = validationDataLocation + "multiallelic.vcf";
|
||||
|
||||
String baseCommand = "-R " + b37KGReference + " --no_cmdline_in_header -o %s ";
|
||||
|
||||
String test1 = baseCommand + "-T SelectVariants -V " + testVCF;
|
||||
WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("355b029487c3b4c499140d71310ca37e"));
|
||||
WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("13329ba7360a8beb3afc02569e5a20c4"));
|
||||
executeTest("Test reading and writing breakpoint VCF", spec1);
|
||||
}
|
||||
|
||||
|
|
@ -51,11 +50,20 @@ public class VCFIntegrationTest extends WalkerTest {
|
|||
}
|
||||
|
||||
@Test
|
||||
public void testReadingAndWritingSamtoolsWExBCFExample() {
|
||||
public void testWritingSamtoolsWExBCFExample() {
|
||||
String testVCF = privateTestDir + "ex2.vcf";
|
||||
String baseCommand = "-R " + b36KGReference + " --no_cmdline_in_header -o %s ";
|
||||
String test1 = baseCommand + "-T SelectVariants -V " + testVCF;
|
||||
WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("9773d6a121cfcb18d090965bc520f120"));
|
||||
executeTest("Test reading and writing samtools WEx vcf/BCF example", spec1);
|
||||
executeTest("Test writing samtools WEx BCF example", spec1);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testReadingSamtoolsWExBCFExample() {
|
||||
String testVCF = privateTestDir + "ex2.bcf";
|
||||
String baseCommand = "-R " + b36KGReference + " --no_cmdline_in_header -o %s ";
|
||||
String test1 = baseCommand + "-T SelectVariants -V " + testVCF;
|
||||
WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("63a2e0484ae37b0680514f53e0bf0c94"));
|
||||
executeTest("Test reading samtools WEx BCF example", spec1);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,12 +1,16 @@
|
|||
package org.broadinstitute.sting.utils.interval;
|
||||
|
||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||
import net.sf.picard.reference.ReferenceSequenceFile;
|
||||
import net.sf.picard.util.Interval;
|
||||
import net.sf.picard.util.IntervalList;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.commandline.IntervalBinding;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
|
||||
import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSource;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
|
|
@ -45,7 +49,7 @@ public class IntervalUtilsUnitTest extends BaseTest {
|
|||
List<GenomeLoc> locs = new ArrayList<GenomeLoc>();
|
||||
for (String interval: intervals)
|
||||
locs.add(hg18GenomeLocParser.parseGenomeLoc(interval));
|
||||
return locs;
|
||||
return Collections.unmodifiableList(locs);
|
||||
}
|
||||
|
||||
@BeforeClass
|
||||
|
|
@ -277,7 +281,10 @@ public class IntervalUtilsUnitTest extends BaseTest {
|
|||
listEveryTwoFromOne.add(hg18GenomeLocParser.createGenomeLoc("chr1",x,x));
|
||||
}
|
||||
|
||||
List<GenomeLoc> ret = IntervalUtils.mergeListsBySetOperator(listEveryTwoFromTwo, listEveryTwoFromOne, IntervalSetRule.UNION);
|
||||
List<GenomeLoc> ret;
|
||||
ret = IntervalUtils.mergeListsBySetOperator(listEveryTwoFromTwo, listEveryTwoFromOne, IntervalSetRule.UNION);
|
||||
Assert.assertEquals(ret.size(), 100);
|
||||
ret = IntervalUtils.mergeListsBySetOperator(listEveryTwoFromTwo, listEveryTwoFromOne, null);
|
||||
Assert.assertEquals(ret.size(), 100);
|
||||
ret = IntervalUtils.mergeListsBySetOperator(listEveryTwoFromTwo, listEveryTwoFromOne, IntervalSetRule.INTERSECTION);
|
||||
Assert.assertEquals(ret.size(), 0);
|
||||
|
|
@ -296,7 +303,10 @@ public class IntervalUtilsUnitTest extends BaseTest {
|
|||
allSites.add(hg18GenomeLocParser.createGenomeLoc("chr1",x,x));
|
||||
}
|
||||
|
||||
List<GenomeLoc> ret = IntervalUtils.mergeListsBySetOperator(listEveryTwoFromTwo, allSites, IntervalSetRule.UNION);
|
||||
List<GenomeLoc> ret;
|
||||
ret = IntervalUtils.mergeListsBySetOperator(listEveryTwoFromTwo, allSites, IntervalSetRule.UNION);
|
||||
Assert.assertEquals(ret.size(), 150);
|
||||
ret = IntervalUtils.mergeListsBySetOperator(listEveryTwoFromTwo, allSites, null);
|
||||
Assert.assertEquals(ret.size(), 150);
|
||||
ret = IntervalUtils.mergeListsBySetOperator(listEveryTwoFromTwo, allSites, IntervalSetRule.INTERSECTION);
|
||||
Assert.assertEquals(ret.size(), 50);
|
||||
|
|
@ -316,7 +326,10 @@ public class IntervalUtilsUnitTest extends BaseTest {
|
|||
}
|
||||
}
|
||||
|
||||
List<GenomeLoc> ret = IntervalUtils.mergeListsBySetOperator(listEveryTwoFromTwo, allSites, IntervalSetRule.UNION);
|
||||
List<GenomeLoc> ret;
|
||||
ret = IntervalUtils.mergeListsBySetOperator(listEveryTwoFromTwo, allSites, IntervalSetRule.UNION);
|
||||
Assert.assertEquals(ret.size(), 40);
|
||||
ret = IntervalUtils.mergeListsBySetOperator(listEveryTwoFromTwo, allSites, null);
|
||||
Assert.assertEquals(ret.size(), 40);
|
||||
ret = IntervalUtils.mergeListsBySetOperator(listEveryTwoFromTwo, allSites, IntervalSetRule.INTERSECTION);
|
||||
Assert.assertEquals(ret.size(), 20);
|
||||
|
|
@ -761,7 +774,13 @@ public class IntervalUtilsUnitTest extends BaseTest {
|
|||
List<GenomeLoc> locs = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Collections.singletonList(privateTestDir + unmergedIntervals));
|
||||
Assert.assertEquals(locs.size(), 2);
|
||||
|
||||
List<GenomeLoc> merged = IntervalUtils.mergeIntervalLocations(locs, IntervalMergingRule.ALL);
|
||||
List<GenomeLoc> merged;
|
||||
|
||||
merged = IntervalUtils.mergeIntervalLocations(locs, IntervalMergingRule.ALL);
|
||||
Assert.assertEquals(merged.size(), 1);
|
||||
|
||||
// Test that null means the same as ALL
|
||||
merged = IntervalUtils.mergeIntervalLocations(locs, null);
|
||||
Assert.assertEquals(merged.size(), 1);
|
||||
}
|
||||
|
||||
|
|
@ -993,6 +1012,74 @@ public class IntervalUtilsUnitTest extends BaseTest {
|
|||
|
||||
// Attempting to use the legacy -L "interval1;interval2" syntax should produce an exception:
|
||||
IntervalBinding<Feature> binding = new IntervalBinding<Feature>("1;2");
|
||||
List<GenomeLoc> intervals = binding.getIntervals(toolkit);
|
||||
binding.getIntervals(toolkit);
|
||||
}
|
||||
|
||||
@DataProvider(name="invalidIntervalTestData")
|
||||
public Object[][] invalidIntervalDataProvider() throws Exception {
|
||||
GATKArgumentCollection argCollection = new GATKArgumentCollection();
|
||||
File fastaFile = new File("public/testdata/exampleFASTA.fasta");
|
||||
GenomeLocParser genomeLocParser = new GenomeLocParser(new IndexedFastaSequenceFile(fastaFile));
|
||||
|
||||
return new Object[][] {
|
||||
new Object[] {argCollection, genomeLocParser, "chr1", 10000000, 20000000},
|
||||
new Object[] {argCollection, genomeLocParser, "chr2", 1, 2},
|
||||
new Object[] {argCollection, genomeLocParser, "chr1", -1, 50}
|
||||
};
|
||||
}
|
||||
|
||||
@Test(dataProvider="invalidIntervalTestData")
|
||||
public void testInvalidPicardIntervalHandling(GATKArgumentCollection argCollection, GenomeLocParser genomeLocParser,
|
||||
String contig, int intervalStart, int intervalEnd ) throws Exception {
|
||||
|
||||
SAMFileHeader picardFileHeader = new SAMFileHeader();
|
||||
picardFileHeader.addSequence(genomeLocParser.getContigInfo("chr1"));
|
||||
IntervalList picardIntervals = new IntervalList(picardFileHeader);
|
||||
picardIntervals.add(new Interval(contig, intervalStart, intervalEnd, true, "dummyname"));
|
||||
|
||||
File picardIntervalFile = createTempFile("testInvalidPicardIntervalHandling", ".intervals");
|
||||
picardIntervals.write(picardIntervalFile);
|
||||
|
||||
List<IntervalBinding<Feature>> intervalArgs = new ArrayList<IntervalBinding<Feature>>(1);
|
||||
intervalArgs.add(new IntervalBinding<Feature>(picardIntervalFile.getAbsolutePath()));
|
||||
|
||||
IntervalUtils.loadIntervals(intervalArgs, argCollection.intervalSetRule, argCollection.intervalMerging, argCollection.intervalPadding, genomeLocParser);
|
||||
}
|
||||
|
||||
@Test(expectedExceptions=UserException.class, dataProvider="invalidIntervalTestData")
|
||||
public void testInvalidGATKFileIntervalHandling(GATKArgumentCollection argCollection, GenomeLocParser genomeLocParser,
|
||||
String contig, int intervalStart, int intervalEnd ) throws Exception {
|
||||
|
||||
File gatkIntervalFile = createTempFile("testInvalidGATKFileIntervalHandling", ".intervals",
|
||||
String.format("%s:%d-%d", contig, intervalStart, intervalEnd));
|
||||
|
||||
List<IntervalBinding<Feature>> intervalArgs = new ArrayList<IntervalBinding<Feature>>(1);
|
||||
intervalArgs.add(new IntervalBinding<Feature>(gatkIntervalFile.getAbsolutePath()));
|
||||
|
||||
IntervalUtils.loadIntervals(intervalArgs, argCollection.intervalSetRule, argCollection.intervalMerging, argCollection.intervalPadding, genomeLocParser);
|
||||
}
|
||||
|
||||
private File createTempFile( String tempFilePrefix, String tempFileExtension, String... lines ) throws Exception {
|
||||
File tempFile = BaseTest.createTempFile(tempFilePrefix, tempFileExtension);
|
||||
FileUtils.writeLines(tempFile, Arrays.asList(lines));
|
||||
return tempFile;
|
||||
}
|
||||
|
||||
@DataProvider(name = "sortAndMergeIntervals")
|
||||
public Object[][] getSortAndMergeIntervals() {
|
||||
return new Object[][] {
|
||||
new Object[] { IntervalMergingRule.OVERLAPPING_ONLY, getLocs("chr1:1", "chr1:3", "chr1:2"), getLocs("chr1:1", "chr1:2", "chr1:3") },
|
||||
new Object[] { IntervalMergingRule.ALL, getLocs("chr1:1", "chr1:3", "chr1:2"), getLocs("chr1:1-3") },
|
||||
new Object[] { IntervalMergingRule.OVERLAPPING_ONLY, getLocs("chr1:1", "chr1:3", "chr2:2"), getLocs("chr1:1", "chr1:3", "chr2:2") },
|
||||
new Object[] { IntervalMergingRule.ALL, getLocs("chr1:1", "chr1:3", "chr2:2"), getLocs("chr1:1", "chr1:3", "chr2:2") },
|
||||
new Object[] { IntervalMergingRule.OVERLAPPING_ONLY, getLocs("chr1:1", "chr1"), getLocs("chr1") },
|
||||
new Object[] { IntervalMergingRule.ALL, getLocs("chr1:1", "chr1"), getLocs("chr1") }
|
||||
};
|
||||
}
|
||||
|
||||
@Test(dataProvider = "sortAndMergeIntervals")
|
||||
public void testSortAndMergeIntervals(IntervalMergingRule merge, List<GenomeLoc> unsorted, List<GenomeLoc> expected) {
|
||||
List<GenomeLoc> sorted = IntervalUtils.sortAndMergeIntervals(hg18GenomeLocParser, unsorted, merge).toList();
|
||||
Assert.assertEquals(sorted, expected);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,7 +2,6 @@ package org.broadinstitute.sting.utils.recalibration;
|
|||
|
||||
import org.broadinstitute.sting.gatk.walkers.bqsr.*;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.collections.NestedHashMap;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
|
@ -22,7 +21,7 @@ import java.util.*;
|
|||
public class BaseRecalibrationUnitTest {
|
||||
|
||||
private org.broadinstitute.sting.gatk.walkers.recalibration.RecalDataManager dataManager;
|
||||
private LinkedHashMap<BQSRKeyManager, Map<Long, RecalDatum>> keysAndTablesMap;
|
||||
private RecalibrationTables recalibrationTables;
|
||||
|
||||
private ReadGroupCovariate rgCovariate;
|
||||
private QualityScoreCovariate qsCovariate;
|
||||
|
|
@ -50,19 +49,14 @@ public class BaseRecalibrationUnitTest {
|
|||
List<Covariate> optionalCovariates = new ArrayList<Covariate>();
|
||||
|
||||
dataManager = new org.broadinstitute.sting.gatk.walkers.recalibration.RecalDataManager(true, 4);
|
||||
keysAndTablesMap = new LinkedHashMap<BQSRKeyManager, Map<Long, RecalDatum>>();
|
||||
|
||||
rgCovariate = new ReadGroupCovariate();
|
||||
rgCovariate.initialize(RAC);
|
||||
requiredCovariates.add(rgCovariate);
|
||||
BQSRKeyManager rgKeyManager = new BQSRKeyManager(requiredCovariates, optionalCovariates);
|
||||
keysAndTablesMap.put(rgKeyManager, new HashMap<Long, RecalDatum>());
|
||||
|
||||
qsCovariate = new QualityScoreCovariate();
|
||||
qsCovariate.initialize(RAC);
|
||||
requiredCovariates.add(qsCovariate);
|
||||
BQSRKeyManager qsKeyManager = new BQSRKeyManager(requiredCovariates, optionalCovariates);
|
||||
keysAndTablesMap.put(qsKeyManager, new HashMap<Long, RecalDatum>());
|
||||
|
||||
cxCovariate = new ContextCovariate();
|
||||
cxCovariate.initialize(RAC);
|
||||
|
|
@ -70,8 +64,6 @@ public class BaseRecalibrationUnitTest {
|
|||
cyCovariate = new CycleCovariate();
|
||||
cyCovariate.initialize(RAC);
|
||||
optionalCovariates.add(cyCovariate);
|
||||
BQSRKeyManager cvKeyManager = new BQSRKeyManager(requiredCovariates, optionalCovariates);
|
||||
keysAndTablesMap.put(cvKeyManager, new HashMap<Long, RecalDatum>());
|
||||
|
||||
final Covariate[] requestedCovariates = new Covariate[requiredCovariates.size() + optionalCovariates.size()];
|
||||
int covariateIndex = 0;
|
||||
|
|
@ -82,10 +74,13 @@ public class BaseRecalibrationUnitTest {
|
|||
|
||||
readCovariates = RecalDataManager.computeCovariates(read, requestedCovariates);
|
||||
|
||||
for (int i=0; i<read.getReadLength(); i++) {
|
||||
long[] bitKeys = readCovariates.getMismatchesKeySet(i);
|
||||
final NestedHashMap rgTable = new NestedHashMap();
|
||||
final NestedHashMap qualTable = new NestedHashMap();
|
||||
final NestedHashMap covTable = new NestedHashMap();
|
||||
|
||||
Object[] objKey = buildObjectKey(bitKeys);
|
||||
for (int i=0; i<read.getReadLength(); i++) {
|
||||
final int[] bitKeys = readCovariates.getMismatchesKeySet(i);
|
||||
final Object[] objKey = buildObjectKey(bitKeys);
|
||||
|
||||
Random random = new Random();
|
||||
int nObservations = random.nextInt(10000);
|
||||
|
|
@ -97,21 +92,17 @@ public class BaseRecalibrationUnitTest {
|
|||
dataManager.addToAllTables(objKey, oldDatum, QualityUtils.MIN_USABLE_Q_SCORE);
|
||||
|
||||
RecalDatum newDatum = new RecalDatum(nObservations, nErrors, estimatedQReported, empiricalQuality);
|
||||
for (Map.Entry<BQSRKeyManager, Map<Long, RecalDatum>> mapEntry : keysAndTablesMap.entrySet()) {
|
||||
final BQSRKeyManager keyManager = mapEntry.getKey();
|
||||
final int numOptionalCovariates = keyManager.getNumOptionalCovariates();
|
||||
if (numOptionalCovariates == 0) {
|
||||
final long masterKey = keyManager.createMasterKey(bitKeys, EventType.BASE_SUBSTITUTION, -1);
|
||||
updateCovariateWithKeySet(mapEntry.getValue(), masterKey, newDatum);
|
||||
} else {
|
||||
for (int j = 0; j < numOptionalCovariates; j++) {
|
||||
final long masterKey = keyManager.createMasterKey(bitKeys, EventType.BASE_SUBSTITUTION, j);
|
||||
updateCovariateWithKeySet(mapEntry.getValue(), masterKey, newDatum);
|
||||
}
|
||||
}
|
||||
|
||||
rgTable.put(newDatum, bitKeys[0], EventType.BASE_SUBSTITUTION.index);
|
||||
qualTable.put(newDatum, bitKeys[0], bitKeys[1], EventType.BASE_SUBSTITUTION.index);
|
||||
for (int j = 0; j < optionalCovariates.size(); j++) {
|
||||
covTable.put(newDatum, bitKeys[0], bitKeys[1], j, bitKeys[2 + j], EventType.BASE_SUBSTITUTION.index);
|
||||
}
|
||||
}
|
||||
dataManager.generateEmpiricalQualities(1, QualityUtils.MAX_RECALIBRATED_Q_SCORE);
|
||||
|
||||
recalibrationTables = new RecalibrationTables(rgTable, qualTable, covTable);
|
||||
|
||||
dataManager.generateEmpiricalQualities(1, QualityUtils.MAX_RECALIBRATED_Q_SCORE);
|
||||
|
||||
List<Byte> quantizedQuals = new ArrayList<Byte>();
|
||||
List<Long> qualCounts = new ArrayList<Long>();
|
||||
|
|
@ -121,16 +112,15 @@ public class BaseRecalibrationUnitTest {
|
|||
}
|
||||
QuantizationInfo quantizationInfo = new QuantizationInfo(quantizedQuals, qualCounts);
|
||||
quantizationInfo.noQuantization();
|
||||
baseRecalibration = new BaseRecalibration(quantizationInfo, keysAndTablesMap, requestedCovariates);
|
||||
baseRecalibration = new BaseRecalibration(quantizationInfo, recalibrationTables, requestedCovariates);
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test(enabled=false)
|
||||
public void testGoldStandardComparison() {
|
||||
debugTables();
|
||||
for (int i = 0; i < read.getReadLength(); i++) {
|
||||
long [] bitKey = readCovariates.getKeySet(i, EventType.BASE_SUBSTITUTION);
|
||||
int [] bitKey = readCovariates.getKeySet(i, EventType.BASE_SUBSTITUTION);
|
||||
Object [] objKey = buildObjectKey(bitKey);
|
||||
byte v2 = baseRecalibration.performSequentialQualityCalculation(bitKey, EventType.BASE_SUBSTITUTION);
|
||||
byte v1 = goldStandardSequentialCalculation(objKey);
|
||||
|
|
@ -138,7 +128,7 @@ public class BaseRecalibrationUnitTest {
|
|||
}
|
||||
}
|
||||
|
||||
private Object[] buildObjectKey(long[] bitKey) {
|
||||
private Object[] buildObjectKey(final int[] bitKey) {
|
||||
Object[] key = new Object[bitKey.length];
|
||||
key[0] = rgCovariate.formatKey(bitKey[0]);
|
||||
key[1] = qsCovariate.formatKey(bitKey[1]);
|
||||
|
|
@ -147,49 +137,6 @@ public class BaseRecalibrationUnitTest {
|
|||
return key;
|
||||
}
|
||||
|
||||
private void debugTables() {
|
||||
System.out.println("\nV1 Table\n");
|
||||
System.out.println("ReadGroup Table:");
|
||||
NestedHashMap nestedTable = dataManager.getCollapsedTable(0);
|
||||
printNestedHashMap(nestedTable.data, "");
|
||||
System.out.println("\nQualityScore Table:");
|
||||
nestedTable = dataManager.getCollapsedTable(1);
|
||||
printNestedHashMap(nestedTable.data, "");
|
||||
System.out.println("\nCovariates Table:");
|
||||
nestedTable = dataManager.getCollapsedTable(2);
|
||||
printNestedHashMap(nestedTable.data, "");
|
||||
nestedTable = dataManager.getCollapsedTable(3);
|
||||
printNestedHashMap(nestedTable.data, "");
|
||||
|
||||
|
||||
int i = 0;
|
||||
System.out.println("\nV2 Table\n");
|
||||
for (Map.Entry<BQSRKeyManager, Map<Long, RecalDatum>> mapEntry : keysAndTablesMap.entrySet()) {
|
||||
BQSRKeyManager keyManager = mapEntry.getKey();
|
||||
Map<Long, RecalDatum> table = mapEntry.getValue();
|
||||
switch(i++) {
|
||||
case 0 :
|
||||
System.out.println("ReadGroup Table:");
|
||||
break;
|
||||
case 1 :
|
||||
System.out.println("QualityScore Table:");
|
||||
break;
|
||||
case 2 :
|
||||
System.out.println("Covariates Table:");
|
||||
break;
|
||||
}
|
||||
for (Map.Entry<Long, RecalDatum> entry : table.entrySet()) {
|
||||
Long key = entry.getKey();
|
||||
RecalDatum datum = entry.getValue();
|
||||
List<Object> keySet = keyManager.keySetFrom(key);
|
||||
System.out.println(String.format("%s => %s", Utils.join(",", keySet), datum) + "," + datum.getEstimatedQReported());
|
||||
}
|
||||
System.out.println();
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
private static void printNestedHashMap(Map table, String output) {
|
||||
for (Object key : table.keySet()) {
|
||||
String ret;
|
||||
|
|
|
|||
|
|
@ -100,10 +100,10 @@ public class GenotypeLikelihoodsUnitTest {
|
|||
|
||||
for (int nAlleles=2; nAlleles<=5; nAlleles++)
|
||||
// simplest case: diploid
|
||||
Assert.assertEquals(GenotypeLikelihoods.calculateNumLikelihoods(nAlleles, 2), nAlleles*(nAlleles+1)/2);
|
||||
Assert.assertEquals(GenotypeLikelihoods.numLikelihoods(nAlleles, 2), nAlleles*(nAlleles+1)/2);
|
||||
|
||||
// some special cases: ploidy = 20, #alleles = 4
|
||||
Assert.assertEquals(GenotypeLikelihoods.calculateNumLikelihoods(4, 20), 1771);
|
||||
Assert.assertEquals(GenotypeLikelihoods.numLikelihoods(4, 20), 1771);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
|
|||
|
|
@ -51,20 +51,28 @@ import java.util.*;
|
|||
public class VariantContextTestProvider {
|
||||
final protected static Logger logger = Logger.getLogger(VariantContextTestProvider.class);
|
||||
|
||||
final private static boolean ENABLE_GENOTYPE_TESTS = true;
|
||||
final private static boolean ENABLE_A_AND_G_TESTS = true;
|
||||
final private static boolean ENABLE_VARARRAY_TESTS = true;
|
||||
final private static boolean ENABLE_PLOIDY_TESTS = true;
|
||||
final private static boolean ENABLE_PL_TESTS = true;
|
||||
final private static boolean ENABLE_SYMBOLIC_ALLELE_TESTS = false;
|
||||
final private static boolean ENABLE_SOURCE_VCF_TESTS = true;
|
||||
final private static boolean ENABLE_VARIABLE_LENGTH_GENOTYPE_STRING_TESTS = true;
|
||||
final private static List<Integer> TWENTY_INTS = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20);
|
||||
|
||||
private static VCFHeader syntheticHeader;
|
||||
final static List<VariantContextTestData> TEST_DATAs = new ArrayList<VariantContextTestData>();
|
||||
private static VariantContext ROOT;
|
||||
|
||||
private final static List<File> testSourceVCFs = Arrays.asList(
|
||||
new File(BaseTest.privateTestDir + "ILLUMINA.wex.broad_phase2_baseline.20111114.both.exome.genotypes.1000.vcf"),
|
||||
new File(BaseTest.privateTestDir + "dbsnp_135.b37.1000.vcf")
|
||||
);
|
||||
private final static List<File> testSourceVCFs = new ArrayList<File>();
|
||||
static {
|
||||
testSourceVCFs.add(new File(BaseTest.privateTestDir + "ILLUMINA.wex.broad_phase2_baseline.20111114.both.exome.genotypes.1000.vcf"));
|
||||
testSourceVCFs.add(new File(BaseTest.privateTestDir + "ex2.vcf"));
|
||||
testSourceVCFs.add(new File(BaseTest.privateTestDir + "dbsnp_135.b37.1000.vcf"));
|
||||
if ( ENABLE_SYMBOLIC_ALLELE_TESTS )
|
||||
testSourceVCFs.add(new File(BaseTest.privateTestDir + "diagnosis_targets_testfile.vcf"));
|
||||
}
|
||||
|
||||
public abstract static class VariantContextIOTest {
|
||||
public String toString() {
|
||||
|
|
@ -148,7 +156,7 @@ public class VariantContextTestProvider {
|
|||
logger.warn("Reading records from " + file);
|
||||
for ( final VariantContext raw : x.getSecond() ) {
|
||||
if ( raw != null )
|
||||
fullyDecoded.add(raw.fullyDecode(x.getFirst()));
|
||||
fullyDecoded.add(raw.fullyDecode(x.getFirst(), false));
|
||||
}
|
||||
logger.warn("Done reading " + file);
|
||||
|
||||
|
|
@ -179,6 +187,7 @@ public class VariantContextTestProvider {
|
|||
|
||||
addHeaderLine(metaData, "GT", 1, VCFHeaderLineType.String);
|
||||
addHeaderLine(metaData, "GQ", 1, VCFHeaderLineType.Integer);
|
||||
addHeaderLine(metaData, "ADA", VCFHeaderLineCount.A, VCFHeaderLineType.Integer);
|
||||
addHeaderLine(metaData, "PL", VCFHeaderLineCount.G, VCFHeaderLineType.Integer);
|
||||
addHeaderLine(metaData, "GS", 2, VCFHeaderLineType.String);
|
||||
addHeaderLine(metaData, "GV", VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String);
|
||||
|
|
@ -241,7 +250,7 @@ public class VariantContextTestProvider {
|
|||
add(builder().attribute("INT3", Arrays.asList(1000, 2000, 3000)));
|
||||
add(builder().attribute("INT3", Arrays.asList(100000, 200000, 300000)));
|
||||
add(builder().attribute("INT3", null));
|
||||
add(builder().attribute("INT20", Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20)));
|
||||
add(builder().attribute("INT20", TWENTY_INTS));
|
||||
|
||||
add(builder().attribute("FLOAT1", 1.0));
|
||||
add(builder().attribute("FLOAT1", 100.0));
|
||||
|
|
@ -267,9 +276,13 @@ public class VariantContextTestProvider {
|
|||
add(builder().attribute("VAR.INFO.STRING", Arrays.asList("s1", "s2", "s3")));
|
||||
add(builder().attribute("VAR.INFO.STRING", null));
|
||||
|
||||
addGenotypesToTestData();
|
||||
if ( ENABLE_GENOTYPE_TESTS ) {
|
||||
addGenotypesToTestData();
|
||||
addComplexGenotypesTest();
|
||||
}
|
||||
|
||||
addComplexGenotypesTest();
|
||||
if ( ENABLE_A_AND_G_TESTS )
|
||||
addGenotypesAndGTests();
|
||||
}
|
||||
|
||||
private static void addGenotypesToTestData() {
|
||||
|
|
@ -314,7 +327,6 @@ public class VariantContextTestProvider {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
private static void addGenotypes( final VariantContext site) {
|
||||
// test ref/ref
|
||||
final Allele ref = site.getReference();
|
||||
|
|
@ -442,6 +454,11 @@ public class VariantContextTestProvider {
|
|||
attr("g1", ref, "INT3", 1, 2, 3),
|
||||
attr("g2", ref, "INT3"));
|
||||
|
||||
addGenotypeTests(site,
|
||||
attr("g1", ref, "INT20", TWENTY_INTS),
|
||||
attr("g2", ref, "INT20", TWENTY_INTS));
|
||||
|
||||
|
||||
if (ENABLE_VARARRAY_TESTS) {
|
||||
addGenotypeTests(site,
|
||||
attr("g1", ref, "INT.VAR", 1, 2, 3),
|
||||
|
|
@ -515,6 +532,46 @@ public class VariantContextTestProvider {
|
|||
}
|
||||
}
|
||||
|
||||
private static void addGenotypesAndGTests() {
|
||||
// for ( final int ploidy : Arrays.asList(2)) {
|
||||
for ( final int ploidy : Arrays.asList(1, 2, 3, 4, 5)) {
|
||||
final List<List<String>> alleleCombinations =
|
||||
Arrays.asList(
|
||||
Arrays.asList("A"),
|
||||
Arrays.asList("A", "C"),
|
||||
Arrays.asList("A", "C", "G"),
|
||||
Arrays.asList("A", "C", "G", "T"));
|
||||
|
||||
for ( final List<String> alleles : alleleCombinations ) {
|
||||
final VariantContextBuilder vcb = builder().alleles(alleles);
|
||||
final VariantContext site = vcb.make();
|
||||
final int nAlleles = site.getNAlleles();
|
||||
final Allele ref = site.getReference();
|
||||
|
||||
// base genotype is ref/.../ref up to ploidy
|
||||
final List<Allele> baseGenotype = new ArrayList<Allele>(ploidy);
|
||||
for ( int i = 0; i < ploidy; i++) baseGenotype.add(ref);
|
||||
final int nPLs = GenotypeLikelihoods.numLikelihoods(nAlleles, ploidy);
|
||||
|
||||
// ada is 0, 1, ..., nAlleles - 1
|
||||
final List<Integer> ada = new ArrayList<Integer>(nAlleles);
|
||||
for ( int i = 0; i < nAlleles - 1; i++ ) ada.add(i);
|
||||
|
||||
// pl is 0, 1, ..., up to nPLs (complex calc of nAlleles and ploidy)
|
||||
final int[] pl = new int[nPLs];
|
||||
for ( int i = 0; i < pl.length; i++ ) pl[i] = i;
|
||||
|
||||
final GenotypeBuilder gb = new GenotypeBuilder("ADA_PL_SAMPLE");
|
||||
gb.alleles(baseGenotype);
|
||||
gb.PL(pl);
|
||||
gb.attribute("ADA", nAlleles == 2 ? ada.get(0) : ada);
|
||||
vcb.genotypes(gb.make());
|
||||
|
||||
add(vcb);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static Genotype attr(final String name, final Allele ref, final String key, final Object ... value) {
|
||||
if ( value.length == 0 )
|
||||
return GenotypeBuilder.create(name, Arrays.asList(ref, ref));
|
||||
|
|
@ -598,7 +655,7 @@ public class VariantContextTestProvider {
|
|||
public VariantContext next() {
|
||||
try {
|
||||
final VariantContext vc = codec.decode(pbs);
|
||||
return vc == null ? null : vc.fullyDecode(header);
|
||||
return vc == null ? null : vc.fullyDecode(header, false);
|
||||
} catch ( IOException e ) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
|
|
@ -646,20 +703,23 @@ public class VariantContextTestProvider {
|
|||
* @param expected
|
||||
*/
|
||||
public static void assertEquals( final VariantContext actual, final VariantContext expected ) {
|
||||
Assert.assertNotNull(actual);
|
||||
Assert.assertEquals(actual.getChr(), expected.getChr());
|
||||
Assert.assertEquals(actual.getStart(), expected.getStart());
|
||||
Assert.assertEquals(actual.getEnd(), expected.getEnd());
|
||||
Assert.assertEquals(actual.getID(), expected.getID());
|
||||
Assert.assertEquals(actual.getAlleles(), expected.getAlleles());
|
||||
Assert.assertNotNull(actual, "VariantContext expected not null");
|
||||
Assert.assertEquals(actual.getChr(), expected.getChr(), "chr");
|
||||
Assert.assertEquals(actual.getStart(), expected.getStart(), "start");
|
||||
Assert.assertEquals(actual.getEnd(), expected.getEnd(), "end");
|
||||
Assert.assertEquals(actual.getID(), expected.getID(), "id");
|
||||
Assert.assertEquals(actual.getAlleles(), expected.getAlleles(), "alleles");
|
||||
|
||||
assertAttributesEquals(actual.getAttributes(), expected.getAttributes());
|
||||
Assert.assertEquals(actual.getFilters(), expected.getFilters());
|
||||
Assert.assertEquals(actual.getFilters(), expected.getFilters(), "filters");
|
||||
BaseTest.assertEqualsDoubleSmart(actual.getPhredScaledQual(), expected.getPhredScaledQual());
|
||||
|
||||
Assert.assertEquals(actual.hasGenotypes(), expected.hasGenotypes());
|
||||
Assert.assertEquals(actual.hasGenotypes(), expected.hasGenotypes(), "hasGenotypes");
|
||||
if ( expected.hasGenotypes() ) {
|
||||
Assert.assertEquals(actual.getSampleNames(), expected.getSampleNames());
|
||||
final Set<String> actualSampleSet = new HashSet<String>(actual.getSampleNames());
|
||||
final Set<String> expectedSampleSet = new HashSet<String>(expected.getSampleNames());
|
||||
Assert.assertTrue(actualSampleSet.equals(expectedSampleSet), "sample names"); // note this is necessary due to testng bug for set comps
|
||||
Assert.assertEquals(actual.getSampleNamesOrderedByName(), expected.getSampleNamesOrderedByName(), "sample names");
|
||||
final Set<String> samples = expected.getSampleNames();
|
||||
for ( final String sample : samples ) {
|
||||
assertEquals(actual.getGenotype(sample), expected.getGenotype(sample));
|
||||
|
|
@ -668,33 +728,33 @@ public class VariantContextTestProvider {
|
|||
}
|
||||
|
||||
public static void assertEquals(final Genotype actual, final Genotype expected) {
|
||||
Assert.assertEquals(actual.getSampleName(), expected.getSampleName());
|
||||
Assert.assertEquals(actual.getAlleles(), expected.getAlleles());
|
||||
Assert.assertEquals(actual.getGenotypeString(), expected.getGenotypeString());
|
||||
Assert.assertEquals(actual.getType(), expected.getType());
|
||||
Assert.assertEquals(actual.getSampleName(), expected.getSampleName(), "Genotype names");
|
||||
Assert.assertEquals(actual.getAlleles(), expected.getAlleles(), "Genotype alleles");
|
||||
Assert.assertEquals(actual.getGenotypeString(), expected.getGenotypeString(), "Genotype string");
|
||||
Assert.assertEquals(actual.getType(), expected.getType(), "Genotype type");
|
||||
|
||||
// filters are the same
|
||||
Assert.assertEquals(actual.getFilters(), expected.getFilters());
|
||||
Assert.assertEquals(actual.isFiltered(), expected.isFiltered());
|
||||
Assert.assertEquals(actual.getFilters(), expected.getFilters(), "Genotype fields");
|
||||
Assert.assertEquals(actual.isFiltered(), expected.isFiltered(), "Genotype isFiltered");
|
||||
|
||||
// inline attributes
|
||||
Assert.assertEquals(actual.getDP(), expected.getDP());
|
||||
Assert.assertEquals(actual.getAD(), expected.getAD());
|
||||
Assert.assertEquals(actual.getGQ(), expected.getGQ());
|
||||
Assert.assertEquals(actual.hasPL(), expected.hasPL());
|
||||
Assert.assertEquals(actual.hasAD(), expected.hasAD());
|
||||
Assert.assertEquals(actual.hasGQ(), expected.hasGQ());
|
||||
Assert.assertEquals(actual.hasDP(), expected.hasDP());
|
||||
Assert.assertEquals(actual.getDP(), expected.getDP(), "Genotype dp");
|
||||
Assert.assertEquals(actual.getAD(), expected.getAD(), "Genotype ad");
|
||||
Assert.assertEquals(actual.getGQ(), expected.getGQ(), "Genotype gq");
|
||||
Assert.assertEquals(actual.hasPL(), expected.hasPL(), "Genotype hasPL");
|
||||
Assert.assertEquals(actual.hasAD(), expected.hasAD(), "Genotype hasAD");
|
||||
Assert.assertEquals(actual.hasGQ(), expected.hasGQ(), "Genotype hasGQ");
|
||||
Assert.assertEquals(actual.hasDP(), expected.hasDP(), "Genotype hasDP");
|
||||
|
||||
Assert.assertEquals(actual.hasLikelihoods(), expected.hasLikelihoods());
|
||||
Assert.assertEquals(actual.getLikelihoodsString(), expected.getLikelihoodsString());
|
||||
Assert.assertEquals(actual.getLikelihoods(), expected.getLikelihoods());
|
||||
Assert.assertEquals(actual.getPL(), expected.getPL());
|
||||
Assert.assertEquals(actual.hasLikelihoods(), expected.hasLikelihoods(), "Genotype haslikelihoods");
|
||||
Assert.assertEquals(actual.getLikelihoodsString(), expected.getLikelihoodsString(), "Genotype getlikelihoodsString");
|
||||
Assert.assertEquals(actual.getLikelihoods(), expected.getLikelihoods(), "Genotype getLikelihoods");
|
||||
Assert.assertEquals(actual.getPL(), expected.getPL(), "Genotype getPL");
|
||||
|
||||
Assert.assertEquals(actual.getPhredScaledQual(), expected.getPhredScaledQual());
|
||||
Assert.assertEquals(actual.getPhredScaledQual(), expected.getPhredScaledQual(), "Genotype phredScaledQual");
|
||||
assertAttributesEquals(actual.getExtendedAttributes(), expected.getExtendedAttributes());
|
||||
Assert.assertEquals(actual.isPhased(), expected.isPhased());
|
||||
Assert.assertEquals(actual.getPloidy(), expected.getPloidy());
|
||||
Assert.assertEquals(actual.isPhased(), expected.isPhased(), "Genotype isPhased");
|
||||
Assert.assertEquals(actual.getPloidy(), expected.getPloidy(), "Genotype getPloidy");
|
||||
}
|
||||
|
||||
private static void assertAttributesEquals(final Map<String, Object> actual, Map<String, Object> expected) {
|
||||
|
|
@ -706,16 +766,16 @@ public class VariantContextTestProvider {
|
|||
final Object expectedValue = expected.get(act.getKey());
|
||||
if ( expectedValue instanceof List ) {
|
||||
final List<Object> expectedList = (List<Object>)expectedValue;
|
||||
Assert.assertTrue(actualValue instanceof List);
|
||||
Assert.assertTrue(actualValue instanceof List, act.getKey() + " should be a list but isn't");
|
||||
final List<Object> actualList = (List<Object>)actualValue;
|
||||
Assert.assertEquals(actualList.size(), expectedList.size());
|
||||
Assert.assertEquals(actualList.size(), expectedList.size(), act.getKey() + " size");
|
||||
for ( int i = 0; i < expectedList.size(); i++ )
|
||||
assertAttributesEquals(actualList.get(i), expectedList.get(i));
|
||||
assertAttributeEquals(act.getKey(), actualList.get(i), expectedList.get(i));
|
||||
} else
|
||||
assertAttributesEquals(actualValue, expectedValue);
|
||||
assertAttributeEquals(act.getKey(), actualValue, expectedValue);
|
||||
} else {
|
||||
// it's ok to have a binding in x -> null that's absent in y
|
||||
Assert.assertNull(actualValue);
|
||||
Assert.assertNull(actualValue, act.getKey() + " present in one but not in the other");
|
||||
}
|
||||
expectedKeys.remove(act.getKey());
|
||||
}
|
||||
|
|
@ -724,7 +784,7 @@ public class VariantContextTestProvider {
|
|||
// and they must all be null
|
||||
for ( final String missingExpected : expectedKeys ) {
|
||||
final Object value = expected.get(missingExpected);
|
||||
Assert.assertTrue(isMissing(value));
|
||||
Assert.assertTrue(isMissing(value), "Attribute " + missingExpected + " missing in one but not in other" );
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -741,12 +801,12 @@ public class VariantContextTestProvider {
|
|||
return false;
|
||||
}
|
||||
|
||||
private static void assertAttributesEquals(final Object actual, final Object expected) {
|
||||
private static void assertAttributeEquals(final String key, final Object actual, final Object expected) {
|
||||
if ( expected instanceof Double ) {
|
||||
// must be very tolerant because doubles are being rounded to 2 sig figs
|
||||
BaseTest.assertEqualsDoubleSmart(actual, (Double)expected, 1e-2);
|
||||
} else
|
||||
Assert.assertEquals(actual, expected);
|
||||
Assert.assertEquals(actual, expected, "Attribute " + key);
|
||||
}
|
||||
|
||||
public static void addComplexGenotypesTest() {
|
||||
|
|
@ -816,14 +876,14 @@ public class VariantContextTestProvider {
|
|||
}
|
||||
|
||||
public static void assertEquals(final VCFHeader actual, final VCFHeader expected) {
|
||||
Assert.assertEquals(actual.getMetaData().size(), expected.getMetaData().size());
|
||||
Assert.assertEquals(actual.getMetaData().size(), expected.getMetaData().size(), "No VCF header lines");
|
||||
|
||||
// for some reason set.equals() is returning false but all paired elements are .equals(). Perhaps compare to is busted?
|
||||
//Assert.assertEquals(actual.getMetaData(), expected.getMetaData());
|
||||
final List<VCFHeaderLine> actualLines = new ArrayList<VCFHeaderLine>(actual.getMetaData());
|
||||
final List<VCFHeaderLine> expectedLines = new ArrayList<VCFHeaderLine>(expected.getMetaData());
|
||||
for ( int i = 0; i < actualLines.size(); i++ ) {
|
||||
Assert.assertEquals(actualLines.get(i), expectedLines.get(i));
|
||||
Assert.assertEquals(actualLines.get(i), expectedLines.get(i), "VCF header lines");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -121,7 +121,7 @@ public class VariantContextWritersUnitTest extends BaseTest {
|
|||
final List<VariantContext> fullyDecoded = new ArrayList<VariantContext>(vcsAfterIO.size());
|
||||
|
||||
for ( final VariantContext withStrings : vcsAfterIO )
|
||||
fullyDecoded.add(withStrings.fullyDecode(header));
|
||||
fullyDecoded.add(withStrings.fullyDecode(header, false));
|
||||
|
||||
return fullyDecoded;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -24,7 +24,6 @@
|
|||
|
||||
package org.broadinstitute.sting.queue.engine
|
||||
|
||||
import java.util.Date
|
||||
import java.text.SimpleDateFormat
|
||||
|
||||
/**
|
||||
|
|
@ -36,18 +35,21 @@ class JobRunInfo {
|
|||
val formatter = new SimpleDateFormat("yy-MM-dd H:mm:ss:SSS");
|
||||
|
||||
/** The start time with millisecond resolution of this job */
|
||||
var startTime: Date = _
|
||||
var startTime: java.util.Date = _
|
||||
/** The done time with millisecond resolution of this job */
|
||||
var doneTime: Date = _
|
||||
var doneTime: java.util.Date = _
|
||||
var exechosts: String = "localhost"
|
||||
|
||||
def getStartTime = startTime
|
||||
def getDoneTime = doneTime
|
||||
def getFormattedStartTime = formatTime(getStartTime)
|
||||
def getFormattedDoneTime = formatTime(getDoneTime)
|
||||
def getStartTime: String = getTime(startTime)
|
||||
def getDoneTime: String = getTime(doneTime)
|
||||
def getFormattedStartTime = formatTime(startTime)
|
||||
def getFormattedDoneTime = formatTime(doneTime)
|
||||
|
||||
/** Helper function that returns the time of the date */
|
||||
private def getTime(d: java.util.Date): String = if ( d != null ) d.getTime.toString else "null"
|
||||
|
||||
/** Helper function that pretty prints the date */
|
||||
private def formatTime(d: Date) = if ( d != null ) formatter.format(d) else "null"
|
||||
private def formatTime(d: java.util.Date): String = if ( d != null ) formatter.format(d) else "null"
|
||||
|
||||
def getExecHosts = exechosts
|
||||
|
||||
|
|
@ -55,14 +57,14 @@ class JobRunInfo {
|
|||
* Was any information set for this jobInfo? JobInfo can be unset because
|
||||
* the job never ran or because it already completed.
|
||||
*/
|
||||
def isFilledIn = startTime != null
|
||||
def isFilledIn = startTime != null && doneTime != null
|
||||
|
||||
/**
|
||||
* How long did the job run (in wall time)? Returns -1 if this jobInfo isn't filled in
|
||||
*/
|
||||
def getRuntimeInMs: Long = {
|
||||
if ( isFilledIn )
|
||||
getDoneTime.getTime - getStartTime.getTime
|
||||
doneTime.getTime - startTime.getTime
|
||||
else
|
||||
-1
|
||||
}
|
||||
|
|
|
|||
|
|
@ -35,7 +35,7 @@ import org.ggf.drmaa.Session
|
|||
class GridEngineJobRunner(session: Session, function: CommandLineFunction) extends DrmaaJobRunner(session, function) with Logging {
|
||||
// Grid Engine disallows certain characters from being in job names.
|
||||
// This replaces all illegal characters with underscores
|
||||
protected override val jobNameFilter = """[\n\t\r/:@\\*?]"""
|
||||
protected override val jobNameFilter = """[\n\t\r/:,@\\*?]"""
|
||||
protected override val minRunnerPriority = -1023
|
||||
protected override val maxRunnerPriority = 0
|
||||
|
||||
|
|
|
|||
|
|
@ -38,11 +38,11 @@ class ContigScatterFunction extends GATKScatterFunction with InProcessFunction {
|
|||
override def scatterCount = if (intervalFilesExist) super.scatterCount min this.maxIntervals else super.scatterCount
|
||||
|
||||
protected override def maxIntervals = {
|
||||
GATKScatterFunction.getGATKIntervals(this.referenceSequence, this.intervals).contigs.size
|
||||
GATKScatterFunction.getGATKIntervals(this.originalGATK).contigs.size
|
||||
}
|
||||
|
||||
def run() {
|
||||
val gi = GATKScatterFunction.getGATKIntervals(this.referenceSequence, this.intervals)
|
||||
val gi = GATKScatterFunction.getGATKIntervals(this.originalGATK)
|
||||
IntervalUtils.scatterContigIntervals(gi.samFileHeader, gi.locs, this.scatterOutputFiles)
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -26,13 +26,23 @@ package org.broadinstitute.sting.queue.extensions.gatk
|
|||
|
||||
import java.io.File
|
||||
import collection.JavaConversions._
|
||||
import org.broadinstitute.sting.utils.interval.{IntervalMergingRule, IntervalUtils}
|
||||
import org.broadinstitute.sting.utils.interval.{IntervalSetRule, IntervalMergingRule, IntervalUtils}
|
||||
import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSource
|
||||
import net.sf.samtools.SAMFileHeader
|
||||
import java.util.Collections
|
||||
import org.broadinstitute.sting.utils.{GenomeLoc, GenomeLocSortedSet, GenomeLocParser}
|
||||
import org.broadinstitute.sting.utils.GenomeLoc
|
||||
import org.broadinstitute.sting.commandline._
|
||||
import org.broad.tribble.Feature
|
||||
|
||||
case class GATKIntervals(reference: File, intervals: Seq[File], intervalsString: Seq[String],
|
||||
intervalSetRule: IntervalSetRule, intervalMergingRule: IntervalMergingRule, intervalPadding: Option[Int],
|
||||
excludeIntervals: Seq[File], excludeIntervalsString: Seq[String]) {
|
||||
|
||||
def this(gatk: CommandLineGATK) = this(
|
||||
gatk.reference_sequence,
|
||||
gatk.intervals, gatk.intervalsString,
|
||||
gatk.interval_set_rule, gatk.interval_merging, gatk.interval_padding,
|
||||
gatk.excludeIntervals, gatk.excludeIntervalsString)
|
||||
|
||||
case class GATKIntervals(reference: File, intervals: Seq[String]) {
|
||||
private lazy val referenceDataSource = new ReferenceDataSource(reference)
|
||||
|
||||
lazy val samFileHeader = {
|
||||
|
|
@ -42,16 +52,46 @@ case class GATKIntervals(reference: File, intervals: Seq[String]) {
|
|||
}
|
||||
|
||||
lazy val locs: java.util.List[GenomeLoc] = {
|
||||
val parser = new GenomeLocParser(referenceDataSource.getReference)
|
||||
val parsedLocs =
|
||||
if (intervals.isEmpty)
|
||||
GenomeLocSortedSet.createSetFromSequenceDictionary(samFileHeader.getSequenceDictionary).toList
|
||||
else
|
||||
IntervalUtils.parseIntervalArguments(parser, intervals)
|
||||
Collections.sort(parsedLocs)
|
||||
val mergedLocs = IntervalUtils.mergeIntervalLocations(parsedLocs, IntervalMergingRule.OVERLAPPING_ONLY)
|
||||
Collections.unmodifiableList(mergedLocs)
|
||||
val includeIntervalBindings = this.intervals.map(GATKIntervals.createBinding(_, "intervals")) ++
|
||||
this.intervalsString.map(GATKIntervals.createBinding(_, "intervalsString"))
|
||||
val excludeIntervalBindings = this.excludeIntervals.map(GATKIntervals.createBinding(_, "excludeIntervals")) ++
|
||||
this.excludeIntervalsString.map(GATKIntervals.createBinding(_, "excludeIntervalsString"))
|
||||
|
||||
IntervalUtils.parseIntervalBindings(
|
||||
referenceDataSource,
|
||||
includeIntervalBindings,
|
||||
intervalSetRule, intervalMergingRule, intervalPadding.getOrElse(0),
|
||||
excludeIntervalBindings).toList
|
||||
}
|
||||
|
||||
lazy val contigs = locs.map(_.getContig).distinct.toSeq
|
||||
}
|
||||
|
||||
object GATKIntervals {
|
||||
def copyIntervalArguments(src: CommandLineGATK, dst: CommandLineGATK) {
|
||||
dst.reference_sequence = src.reference_sequence
|
||||
dst.intervals = src.intervals
|
||||
dst.intervalsString = src.intervalsString
|
||||
dst.interval_set_rule = src.interval_set_rule
|
||||
dst.interval_merging = src.interval_merging
|
||||
dst.interval_padding = src.interval_padding
|
||||
dst.excludeIntervals = src.excludeIntervals
|
||||
dst.excludeIntervalsString = src.excludeIntervalsString
|
||||
}
|
||||
|
||||
private def createBinding(interval: File, argumentName: String): IntervalBinding[Feature] = {
|
||||
val tags = interval match {
|
||||
case taggedFile: TaggedFile => ParsingMethod.parseTags(argumentName, taggedFile.tag)
|
||||
case file: File => new Tags
|
||||
}
|
||||
createBinding(interval.getAbsolutePath, argumentName, tags)
|
||||
}
|
||||
|
||||
private def createBinding(interval: String, argumentName: String): IntervalBinding[Feature] = {
|
||||
createBinding(interval, argumentName, new Tags)
|
||||
}
|
||||
|
||||
private def createBinding(interval: String, argumentName: String, tags: Tags): IntervalBinding[Feature] = {
|
||||
ArgumentTypeDescriptor.parseBinding(interval, classOf[Feature], classOf[IntervalBinding[Feature]], argumentName, tags, argumentName).asInstanceOf[IntervalBinding[Feature]]
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -28,14 +28,17 @@ import org.broadinstitute.sting.utils.interval.IntervalUtils
|
|||
import java.io.File
|
||||
import org.broadinstitute.sting.utils.io.IOUtils
|
||||
import org.broadinstitute.sting.queue.function.scattergather.{CloneFunction, ScatterFunction}
|
||||
import org.broadinstitute.sting.commandline.Output
|
||||
import org.broadinstitute.sting.commandline._
|
||||
|
||||
trait GATKScatterFunction extends ScatterFunction {
|
||||
/** The runtime field to set for specifying an interval file. */
|
||||
/* The runtime field to set for specifying intervals. */
|
||||
private final val intervalsField = "intervals"
|
||||
|
||||
/** The runtime field to set for specifying an interval string. */
|
||||
private final val intervalsStringField = "intervalsString"
|
||||
private final val excludeIntervalsField = "excludeIntervals"
|
||||
private final val excludeIntervalsStringField = "excludeIntervalsString"
|
||||
private final val intervalsSetRuleField = "interval_set_rule"
|
||||
private final val intervalMergingField = "interval_merging"
|
||||
private final val intervalPaddingField = "interval_padding"
|
||||
|
||||
@Output(doc="Scatter function outputs")
|
||||
var scatterOutputFiles: Seq[File] = Nil
|
||||
|
|
@ -43,25 +46,14 @@ trait GATKScatterFunction extends ScatterFunction {
|
|||
/** The original GATK function. */
|
||||
protected var originalGATK: CommandLineGATK = _
|
||||
|
||||
/** The reference sequence for the GATK function. */
|
||||
protected var referenceSequence: File = _
|
||||
|
||||
/** The list of interval files ("/path/to/interval.list") or interval strings ("chr1", "chr2") to parse into smaller parts. */
|
||||
protected var intervals: Seq[String] = Nil
|
||||
|
||||
/** Whether the last scatter job should also include any unmapped reads. */
|
||||
protected var includeUnmapped: Boolean = _
|
||||
|
||||
override def init() {
|
||||
this.originalGATK = this.originalFunction.asInstanceOf[CommandLineGATK]
|
||||
this.referenceSequence = this.originalGATK.reference_sequence
|
||||
if (this.originalGATK.intervals.isEmpty && (this.originalGATK.intervalsString == null || this.originalGATK.intervalsString.isEmpty)) {
|
||||
this.intervals ++= GATKScatterFunction.getGATKIntervals(this.referenceSequence, Seq.empty[String]).contigs
|
||||
} else {
|
||||
this.intervals ++= this.originalGATK.intervals.map(_.toString)
|
||||
this.intervals ++= this.originalGATK.intervalsString.filterNot(interval => IntervalUtils.isUnmapped(interval))
|
||||
// If intervals have been specified check if unmapped is included
|
||||
if (this.originalGATK.intervals.size + this.originalGATK.intervalsString.size > 0)
|
||||
this.includeUnmapped = this.originalGATK.intervalsString.exists(interval => IntervalUtils.isUnmapped(interval))
|
||||
}
|
||||
}
|
||||
|
||||
override def isScatterGatherable = {
|
||||
|
|
@ -74,6 +66,12 @@ trait GATKScatterFunction extends ScatterFunction {
|
|||
cloneFunction.setFieldValue(this.intervalsStringField, Seq("unmapped"))
|
||||
else
|
||||
cloneFunction.setFieldValue(this.intervalsStringField, Seq.empty[String])
|
||||
|
||||
cloneFunction.setFieldValue(this.intervalsSetRuleField, null)
|
||||
cloneFunction.setFieldValue(this.intervalMergingField, null)
|
||||
cloneFunction.setFieldValue(this.intervalPaddingField, None)
|
||||
cloneFunction.setFieldValue(this.excludeIntervalsField, Seq.empty[File])
|
||||
cloneFunction.setFieldValue(this.excludeIntervalsStringField, Seq.empty[String])
|
||||
}
|
||||
|
||||
override def bindCloneInputs(cloneFunction: CloneFunction, index: Int) {
|
||||
|
|
@ -85,29 +83,28 @@ trait GATKScatterFunction extends ScatterFunction {
|
|||
}
|
||||
|
||||
/**
|
||||
* Returns true if all interval files exist.
|
||||
* @return true if all interval files exist.
|
||||
*/
|
||||
protected def intervalFilesExist = {
|
||||
!this.intervals.exists(interval => IntervalUtils.isIntervalFile(interval, false) && !new File(interval).exists)
|
||||
!(this.originalGATK.intervals ++ this.originalGATK.excludeIntervals).exists(interval => !interval.exists())
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the maximum number of intervals or this.scatterCount if the maximum can't be determined ahead of time.
|
||||
* @return the maximum number of intervals or this.scatterCount if the maximum can't be determined ahead of time.
|
||||
*/
|
||||
protected def maxIntervals: Int
|
||||
}
|
||||
|
||||
object GATKScatterFunction {
|
||||
var gatkIntervals = Seq.empty[GATKIntervals]
|
||||
var gatkIntervalsCache = Seq.empty[GATKIntervals]
|
||||
|
||||
def getGATKIntervals(reference: File, intervals: Seq[String]) = {
|
||||
gatkIntervals.find(gi => gi.reference == reference && gi.intervals == intervals) match {
|
||||
case Some(gi) => gi
|
||||
def getGATKIntervals(originalFunction: CommandLineGATK) = {
|
||||
val gatkIntervals = new GATKIntervals(originalFunction)
|
||||
gatkIntervalsCache.find(_ == gatkIntervals) match {
|
||||
case Some(existingGatkIntervals) => existingGatkIntervals
|
||||
case None =>
|
||||
val gi = new GATKIntervals(reference, intervals)
|
||||
gatkIntervals :+= gi
|
||||
gi
|
||||
gatkIntervalsCache :+= gatkIntervals
|
||||
gatkIntervals
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -33,12 +33,12 @@ import org.broadinstitute.sting.queue.function.InProcessFunction
|
|||
*/
|
||||
class IntervalScatterFunction extends GATKScatterFunction with InProcessFunction {
|
||||
protected override def maxIntervals =
|
||||
GATKScatterFunction.getGATKIntervals(this.referenceSequence, this.intervals).locs.size
|
||||
GATKScatterFunction.getGATKIntervals(this.originalGATK).locs.size
|
||||
|
||||
override def scatterCount = if (intervalFilesExist) super.scatterCount min this.maxIntervals else super.scatterCount
|
||||
|
||||
def run() {
|
||||
val gi = GATKScatterFunction.getGATKIntervals(this.referenceSequence, this.intervals)
|
||||
val gi = GATKScatterFunction.getGATKIntervals(this.originalGATK)
|
||||
val splits = IntervalUtils.splitFixedIntervals(gi.locs, this.scatterOutputFiles.size)
|
||||
IntervalUtils.scatterFixedIntervals(gi.samFileHeader, splits, this.scatterOutputFiles)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -31,13 +31,11 @@ import org.broadinstitute.sting.queue.function.InProcessFunction
|
|||
/**
|
||||
* A scatter function that divides down to the locus level.
|
||||
*/
|
||||
//class LocusScatterFunction extends IntervalScatterFunction { }
|
||||
|
||||
class LocusScatterFunction extends GATKScatterFunction with InProcessFunction {
|
||||
protected override def maxIntervals = scatterCount
|
||||
|
||||
def run() {
|
||||
val gi = GATKScatterFunction.getGATKIntervals(this.referenceSequence, this.intervals)
|
||||
val gi = GATKScatterFunction.getGATKIntervals(this.originalGATK)
|
||||
val splits = IntervalUtils.splitLocusIntervals(gi.locs, this.scatterOutputFiles.size)
|
||||
IntervalUtils.scatterFixedIntervals(gi.samFileHeader, splits, this.scatterOutputFiles)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -37,14 +37,11 @@ class VcfGatherFunction extends CombineVariants with GatherFunction {
|
|||
|
||||
private lazy val originalGATK = this.originalFunction.asInstanceOf[CommandLineGATK]
|
||||
|
||||
override def freezeFieldValues {
|
||||
override def freezeFieldValues() {
|
||||
this.jarFile = this.originalGATK.jarFile
|
||||
this.reference_sequence = this.originalGATK.reference_sequence
|
||||
this.intervals = this.originalGATK.intervals
|
||||
this.intervalsString = this.originalGATK.intervalsString
|
||||
|
||||
this.variant = this.gatherParts.zipWithIndex map { case (input, index) => new TaggedFile(input, "input"+index) }
|
||||
this.out = this.originalOutput
|
||||
GATKIntervals.copyIntervalArguments(this.originalGATK, this)
|
||||
|
||||
// NO_HEADER and sites_only from VCFWriterArgumentTypeDescriptor
|
||||
// are added by the GATKExtensionsGenerator to the subclass of CommandLineGATK
|
||||
|
|
@ -55,6 +52,6 @@ class VcfGatherFunction extends CombineVariants with GatherFunction {
|
|||
val sitesOnly = QFunction.findField(originalFunction.getClass, VCFWriterArgumentTypeDescriptor.SITES_ONLY_ARG_NAME)
|
||||
this.sites_only = originalGATK.getFieldValue(sitesOnly).asInstanceOf[Boolean]
|
||||
|
||||
super.freezeFieldValues
|
||||
super.freezeFieldValues()
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -49,6 +49,11 @@ trait JavaCommandLineFunction extends CommandLineFunction {
|
|||
*/
|
||||
var javaMemoryLimit: Option[Double] = None
|
||||
|
||||
/**
|
||||
* Max number of GC threads
|
||||
*/
|
||||
var javaGCThreads: Option[Int] = None
|
||||
|
||||
override def freezeFieldValues() {
|
||||
super.freezeFieldValues()
|
||||
|
||||
|
|
@ -73,6 +78,8 @@ trait JavaCommandLineFunction extends CommandLineFunction {
|
|||
}
|
||||
|
||||
def javaOpts = optional("-Xmx", javaMemoryLimit.map(gb => (gb * 1024).ceil.toInt), "m", spaceSeparated=false) +
|
||||
conditional(javaGCThreads.isDefined, "-XX:+UseParallelOldGC") +
|
||||
optional("-XX:ParallelGCThreads=", javaGCThreads, spaceSeparated=false) +
|
||||
required("-Djava.io.tmpdir=", jobTempDir, spaceSeparated=false)
|
||||
|
||||
def commandLine = required("java") +
|
||||
|
|
|
|||
|
|
@ -56,8 +56,8 @@ trait QJobReport extends Logging {
|
|||
"jobName" -> QJobReport.workAroundSameJobNames(this),
|
||||
"intermediate" -> self.isIntermediate,
|
||||
"exechosts" -> info.getExecHosts,
|
||||
"startTime" -> info.getStartTime.getTime,
|
||||
"doneTime" -> info.getDoneTime.getTime,
|
||||
"startTime" -> info.getStartTime,
|
||||
"doneTime" -> info.getDoneTime,
|
||||
"formattedStartTime" -> info.getFormattedStartTime,
|
||||
"formattedDoneTime" -> info.getFormattedDoneTime,
|
||||
"runtime" -> info.getRuntimeInMs).mapValues((x:Any) => if (x != null) x.toString else "null")
|
||||
|
|
|
|||
|
|
@ -70,7 +70,7 @@ class ScalaCompoundArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
|||
* @return The parsed object.
|
||||
*/
|
||||
def parse(parsingEngine: ParsingEngine, source: ArgumentSource, typeType: Type, argumentMatches: ArgumentMatches) = {
|
||||
parse(parsingEngine,source, makeRawTypeIfNecessary(typeType), argumentMatches)
|
||||
parse(parsingEngine,source, ArgumentTypeDescriptor.makeRawTypeIfNecessary(typeType), argumentMatches)
|
||||
}
|
||||
|
||||
def parse(parsingEngine: ParsingEngine, source: ArgumentSource, classType: Class[_], argumentMatches: ArgumentMatches) = {
|
||||
|
|
|
|||
|
|
@ -26,19 +26,21 @@ package org.broadinstitute.sting.queue.extensions.gatk
|
|||
|
||||
import java.io.File
|
||||
import org.testng.Assert
|
||||
import org.testng.annotations.Test
|
||||
import org.testng.annotations.{DataProvider, Test}
|
||||
import org.broadinstitute.sting.BaseTest
|
||||
import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSource
|
||||
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile
|
||||
import org.broadinstitute.sting.utils.{GenomeLocSortedSet, GenomeLocParser}
|
||||
import collection.JavaConversions._
|
||||
import org.broadinstitute.sting.utils.interval.IntervalUtils
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException
|
||||
|
||||
class GATKIntervalsUnitTest {
|
||||
private final lazy val hg18Reference = new File(BaseTest.hg18Reference)
|
||||
private final lazy val hg18GenomeLocParser = new GenomeLocParser(new CachingIndexedFastaSequenceFile(hg18Reference))
|
||||
private final lazy val hg18ReferenceLocs = GenomeLocSortedSet.
|
||||
createSetFromSequenceDictionary(new ReferenceDataSource(hg18Reference).getReference.getSequenceDictionary).toList
|
||||
private final lazy val hg19GenomeLocParser = new GenomeLocParser(new CachingIndexedFastaSequenceFile(hg19Reference))
|
||||
|
||||
private final lazy val hg19Reference = new File(BaseTest.hg19Reference)
|
||||
|
||||
|
|
@ -48,14 +50,14 @@ class GATKIntervalsUnitTest {
|
|||
val chr2 = hg18GenomeLocParser.parseGenomeLoc("chr2:2-3")
|
||||
val chr3 = hg18GenomeLocParser.parseGenomeLoc("chr3:3-5")
|
||||
|
||||
val gi = new GATKIntervals(hg18Reference, Seq("chr1:1-1", "chr2:2-3", "chr3:3-5"))
|
||||
val gi = createGATKIntervals(hg18Reference, Seq("chr1:1-1", "chr2:2-3", "chr3:3-5"))
|
||||
Assert.assertEquals(gi.locs.toSeq, Seq(chr1, chr2, chr3))
|
||||
Assert.assertEquals(gi.contigs, Seq("chr1", "chr2", "chr3"))
|
||||
}
|
||||
|
||||
@Test(timeOut = 30000L)
|
||||
def testIntervalFile() {
|
||||
var gi = new GATKIntervals(hg19Reference, Seq(BaseTest.hg19Intervals))
|
||||
val gi = createGATKIntervals(hg19Reference, Seq(BaseTest.hg19Intervals))
|
||||
Assert.assertEquals(gi.locs.size, 189894)
|
||||
// Timeout check is because of bad:
|
||||
// for(Item item: javaConvertedScalaList)
|
||||
|
|
@ -67,28 +69,85 @@ class GATKIntervalsUnitTest {
|
|||
|
||||
@Test
|
||||
def testEmptyIntervals() {
|
||||
val gi = new GATKIntervals(hg18Reference, Nil)
|
||||
val gi = createGATKIntervals(hg18Reference, Nil)
|
||||
Assert.assertEquals(gi.locs, hg18ReferenceLocs)
|
||||
Assert.assertEquals(gi.contigs.size, hg18ReferenceLocs.size)
|
||||
}
|
||||
|
||||
@Test
|
||||
def testContigCounts() {
|
||||
Assert.assertEquals(new GATKIntervals(hg18Reference, Nil).contigs, hg18ReferenceLocs.map(_.getContig))
|
||||
Assert.assertEquals(new GATKIntervals(hg18Reference, Seq("chr1", "chr2", "chr3")).contigs, Seq("chr1", "chr2", "chr3"))
|
||||
Assert.assertEquals(new GATKIntervals(hg18Reference, Seq("chr1:1-2", "chr1:4-5", "chr2:1-1", "chr3:2-2")).contigs, Seq("chr1", "chr2", "chr3"))
|
||||
Assert.assertEquals(createGATKIntervals(hg18Reference, Nil).contigs, hg18ReferenceLocs.map(_.getContig))
|
||||
Assert.assertEquals(createGATKIntervals(hg18Reference, Seq("chr1", "chr2", "chr3")).contigs, Seq("chr1", "chr2", "chr3"))
|
||||
Assert.assertEquals(createGATKIntervals(hg18Reference, Seq("chr1:1-2", "chr1:4-5", "chr2:1-1", "chr3:2-2")).contigs, Seq("chr1", "chr2", "chr3"))
|
||||
}
|
||||
|
||||
@Test
|
||||
def testSortAndMergeIntervals() {
|
||||
testSortAndMergeIntervals(Seq("chr1:1-10", "chr1:1-10", "chr1:1-10"), Seq("chr1:1-10"))
|
||||
testSortAndMergeIntervals(Seq("chr1:1-10", "chr1:1-11", "chr1:1-12"), Seq("chr1:1-12"))
|
||||
testSortAndMergeIntervals(Seq("chr1:1-10", "chr1:11-20", "chr1:21-30"), Seq("chr1:1-10", "chr1:11-20", "chr1:21-30"))
|
||||
testSortAndMergeIntervals(Seq("chr1:1-10", "chr1:10-20", "chr1:21-30"), Seq("chr1:1-20", "chr1:21-30"))
|
||||
testSortAndMergeIntervals(Seq("chr1:1-10", "chr1:21-30", "chr1:10-20"), Seq("chr1:1-20", "chr1:21-30"))
|
||||
@DataProvider(name="sortAndMergeIntervals")
|
||||
def getSortAndMergeIntervals: Array[Array[AnyRef]] = {
|
||||
Array(
|
||||
Array(Seq("chr1:1-10", "chr1:1-10", "chr1:1-10"), Seq("chr1:1-10")),
|
||||
Array(Seq("chr1:1-10", "chr1:1-11", "chr1:1-12"), Seq("chr1:1-12")),
|
||||
Array(Seq("chr1:1-10", "chr1:11-20", "chr1:21-30"), Seq("chr1:1-30")),
|
||||
Array(Seq("chr1:1-10", "chr1:10-20", "chr1:21-30"), Seq("chr1:1-30")),
|
||||
Array(Seq("chr1:1-9", "chr1:21-30", "chr1:11-20"), Seq("chr1:1-9", "chr1:11-30"))
|
||||
).asInstanceOf[Array[Array[AnyRef]]]
|
||||
}
|
||||
|
||||
private def testSortAndMergeIntervals(actual: Seq[String], expected: Seq[String]) {
|
||||
Assert.assertEquals(new GATKIntervals(hg18Reference, actual).locs.toSeq, expected.map(hg18GenomeLocParser.parseGenomeLoc(_)))
|
||||
@Test(dataProvider="sortAndMergeIntervals")
|
||||
def testSortAndMergeIntervals(unmerged: Seq[String], expected: Seq[String]) {
|
||||
Assert.assertEquals(createGATKIntervals(hg18Reference, unmerged).locs.toSeq, expected.map(hg18GenomeLocParser.parseGenomeLoc(_)))
|
||||
}
|
||||
|
||||
@DataProvider(name="taggedFiles")
|
||||
def getTaggedFiles: Array[Array[AnyRef]] = {
|
||||
Array(
|
||||
Array(hg18Reference, BaseTest.privateTestDir + "small_unmerged_gatk_intervals.list", null, Seq("chr1:1-10")),
|
||||
Array(hg18Reference, BaseTest.privateTestDir + "small_unmerged_gatk_intervals.list", "", Seq("chr1:1-10")),
|
||||
Array(hg18Reference, BaseTest.privateTestDir + "small_unmerged_gatk_intervals.list", "myList", Seq("chr1:1-10")),
|
||||
Array(hg19Reference, BaseTest.privateTestDir + "small.indel.test.vcf", null, Seq("1:897475-897481", "1:10001292")),
|
||||
Array(hg19Reference, BaseTest.privateTestDir + "small.indel.test.vcf", "", Seq("1:897475-897481", "1:10001292")),
|
||||
Array(hg19Reference, BaseTest.privateTestDir + "small.indel.test.vcf", "myVcf", Seq("1:897475-897481", "1:10001292")),
|
||||
Array(hg19Reference, BaseTest.privateTestDir + "small.indel.test.vcf", "VCF", Seq("1:897475-897481", "1:10001292")),
|
||||
Array(hg19Reference, BaseTest.privateTestDir + "small.indel.test.vcf", "myVcf,VCF", Seq("1:897475-897481", "1:10001292")),
|
||||
Array(hg19Reference, BaseTest.privateTestDir + "sampleBedFile.bed", null, Seq("20:1-999", "20:1002-2000", "22:1001-6000")),
|
||||
Array(hg19Reference, BaseTest.privateTestDir + "sampleBedFile.bed", "", Seq("20:1-999", "20:1002-2000", "22:1001-6000")),
|
||||
Array(hg19Reference, BaseTest.privateTestDir + "sampleBedFile.bed", "myBed", Seq("20:1-999", "20:1002-2000", "22:1001-6000")),
|
||||
Array(hg19Reference, BaseTest.privateTestDir + "sampleBedFile.bed", "BED", Seq("20:1-999", "20:1002-2000", "22:1001-6000")),
|
||||
Array(hg19Reference, BaseTest.privateTestDir + "sampleBedFile.bed", "myBed,BED", Seq("20:1-999", "20:1002-2000", "22:1001-6000"))
|
||||
)
|
||||
}
|
||||
|
||||
@Test(dataProvider="taggedFiles")
|
||||
def testTaggedFiles(reference: File, file: String, tags: String, expected: Seq[String]) {
|
||||
val gatk = new CommandLineGATK
|
||||
gatk.reference_sequence = reference
|
||||
gatk.intervals = Seq(new TaggedFile(file, tags))
|
||||
val parser = if (reference == hg18Reference) hg18GenomeLocParser else hg19GenomeLocParser
|
||||
Assert.assertEquals(new GATKIntervals(gatk).locs.toSeq, expected.map(parser.parseGenomeLoc(_)))
|
||||
}
|
||||
|
||||
@DataProvider(name="badTaggedFiles")
|
||||
def getBadTaggedFiles: Array[Array[AnyRef]] = {
|
||||
Array(
|
||||
Array(hg18Reference, BaseTest.privateTestDir + "small_unmerged_gatk_intervals.list", "VCF"),
|
||||
Array(hg18Reference, BaseTest.privateTestDir + "small_unmerged_gatk_intervals.list", "too,many,tags"),
|
||||
Array(hg19Reference, BaseTest.privateTestDir + "small.indel.test.vcf", "BED"),
|
||||
Array(hg19Reference, BaseTest.privateTestDir + "small.indel.test.vcf", "VCF,myVCF"),
|
||||
Array(hg19Reference, BaseTest.privateTestDir + "small.indel.test.vcf", "myVCF,VCF,extra"),
|
||||
Array(hg19Reference, BaseTest.privateTestDir + "sampleBedFile.bed", "VCF"),
|
||||
Array(hg19Reference, BaseTest.privateTestDir + "sampleBedFile.bed", "BED,myBed"),
|
||||
Array(hg19Reference, BaseTest.privateTestDir + "sampleBedFile.bed", "myBed,BED,extra")
|
||||
).asInstanceOf[Array[Array[AnyRef]]]
|
||||
}
|
||||
|
||||
@Test(dataProvider = "badTaggedFiles", expectedExceptions = Array(classOf[UserException]))
|
||||
def testBadTaggedFiles(reference: File, file: String, tags: String) {
|
||||
testTaggedFiles(reference, file, tags, Nil)
|
||||
}
|
||||
|
||||
private def createGATKIntervals(reference: File, intervals: Seq[String]) = {
|
||||
val gatk = new CommandLineGATK
|
||||
gatk.reference_sequence = reference
|
||||
gatk.intervalsString = intervals
|
||||
new GATKIntervals(gatk)
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -39,7 +39,6 @@ class DataProcessingPipelineTest {
|
|||
" -R " + BaseTest.publicTestDir + "exampleFASTA.fasta",
|
||||
" -i " + BaseTest.publicTestDir + "exampleBAM.bam",
|
||||
" -D " + BaseTest.publicTestDir + "exampleDBSNP.vcf",
|
||||
" -nv ",
|
||||
" -test ",
|
||||
" -p " + projectName).mkString
|
||||
spec.fileMD5s += testOut -> "0de95b5642e41e11ecd6fa1770242b88"
|
||||
|
|
@ -57,7 +56,6 @@ class DataProcessingPipelineTest {
|
|||
" -R " + BaseTest.publicTestDir + "exampleFASTA.fasta",
|
||||
" -i " + BaseTest.publicTestDir + "exampleBAM.bam",
|
||||
" -D " + BaseTest.publicTestDir + "exampleDBSNP.vcf",
|
||||
" -nv ",
|
||||
" -test ",
|
||||
" -bwa /home/unix/carneiro/bin/bwa",
|
||||
" -bwape ",
|
||||
|
|
|
|||
|
|
@ -42,4 +42,43 @@ class ExampleUnifiedGenotyperPipelineTest {
|
|||
spec.jobRunners = PipelineTest.allJobRunners
|
||||
PipelineTest.executeTest(spec)
|
||||
}
|
||||
|
||||
@Test
|
||||
def testUnifiedGenotyperWithGatkIntervals() {
|
||||
val spec = new PipelineTestSpec
|
||||
spec.name = "unifiedgenotyper_with_gatk_intervals"
|
||||
spec.args = Array(
|
||||
" -S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleUnifiedGenotyper.scala",
|
||||
" -I " + BaseTest.validationDataLocation + "OV-0930.normal.chunk.bam",
|
||||
" -R " + BaseTest.hg18Reference,
|
||||
" -L " + BaseTest.validationDataLocation + "intervalTest.intervals").mkString
|
||||
spec.jobRunners = Seq("Lsf706")
|
||||
PipelineTest.executeTest(spec)
|
||||
}
|
||||
|
||||
@Test
|
||||
def testUnifiedGenotyperWithBedIntervals() {
|
||||
val spec = new PipelineTestSpec
|
||||
spec.name = "unifiedgenotyper_with_bed_intervals"
|
||||
spec.args = Array(
|
||||
" -S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleUnifiedGenotyper.scala",
|
||||
" -I " + BaseTest.validationDataLocation + "OV-0930.normal.chunk.bam",
|
||||
" -R " + BaseTest.hg18Reference,
|
||||
" -L " + BaseTest.validationDataLocation + "intervalTest.bed").mkString
|
||||
spec.jobRunners = Seq("Lsf706")
|
||||
PipelineTest.executeTest(spec)
|
||||
}
|
||||
|
||||
@Test
|
||||
def testUnifiedGenotyperWithVcfIntervals() {
|
||||
val spec = new PipelineTestSpec
|
||||
spec.name = "unifiedgenotyper_with_vcf_intervals"
|
||||
spec.args = Array(
|
||||
" -S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleUnifiedGenotyper.scala",
|
||||
" -I " + BaseTest.validationDataLocation + "OV-0930.normal.chunk.bam",
|
||||
" -R " + BaseTest.hg18Reference,
|
||||
" -L " + BaseTest.validationDataLocation + "intervalTest.1.vcf").mkString
|
||||
spec.jobRunners = Seq("Lsf706")
|
||||
PipelineTest.executeTest(spec)
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,3 +0,0 @@
|
|||
<ivy-module version="1.0">
|
||||
<info organisation="org.broad" module="tribble" revision="107" status="integration" />
|
||||
</ivy-module>
|
||||
Binary file not shown.
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue