Sending latest bug fixes to Reduce Reads to the main repository
This commit is contained in:
commit
96768c8a18
|
|
@ -114,7 +114,7 @@ public class AnalyzeCovariates extends CommandLineProgram {
|
|||
private String RECAL_FILE = "output.recal_data.csv";
|
||||
@Argument(fullName = "output_dir", shortName = "outputDir", doc = "The directory in which to output all the plots and intermediate data files", required = false)
|
||||
private String OUTPUT_DIR = "analyzeCovariates/";
|
||||
@Argument(fullName = "path_to_Rscript", shortName = "Rscript", doc = "The path to your implementation of Rscript. For Broad users this is maybe /broad/tools/apps/R-2.6.0/bin/Rscript", required = false)
|
||||
@Argument(fullName = "path_to_Rscript", shortName = "Rscript", doc = "The path to your implementation of Rscript. For Broad users this is maybe /broad/software/free/Linux/redhat_5_x86_64/pkgs/r_2.12.0/bin/Rscript", required = false)
|
||||
private String PATH_TO_RSCRIPT = "Rscript";
|
||||
@Argument(fullName = "path_to_resources", shortName = "resources", doc = "Path to resources folder holding the Sting R scripts.", required = false)
|
||||
private String PATH_TO_RESOURCES = "public/R/";
|
||||
|
|
|
|||
|
|
@ -162,6 +162,12 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
|
|||
@Argument(fullName="vcfContainsOnlyIndels", shortName="dels",doc="Use if you are annotating an indel vcf, currently VERY experimental", required = false)
|
||||
protected boolean indelsOnly = false;
|
||||
|
||||
@Argument(fullName="family_string",shortName="family",required=false,doc="A family string of the form mom+dad=child for use with the mendelian violation ratio annotation")
|
||||
public String familyStr = null;
|
||||
|
||||
@Argument(fullName="MendelViolationGenotypeQualityThreshold",shortName="mvq",required=false,doc="The genotype quality treshold in order to annotate mendelian violation ratio")
|
||||
public double minGenotypeQualityP = 0.0;
|
||||
|
||||
private VariantAnnotatorEngine engine;
|
||||
|
||||
private Collection<VariantContext> indelBufferContext;
|
||||
|
|
|
|||
|
|
@ -155,7 +155,7 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
|||
private double[] TS_TRANCHES = new double[] {100.0, 99.9, 99.0, 90.0};
|
||||
@Argument(fullName="ignore_filter", shortName="ignoreFilter", doc="If specified the variant recalibrator will use variants even if the specified filter name is marked in the input VCF file", required=false)
|
||||
private String[] IGNORE_INPUT_FILTERS = null;
|
||||
@Argument(fullName="path_to_Rscript", shortName = "Rscript", doc = "The path to your implementation of Rscript. For Broad users this is maybe /broad/tools/apps/R-2.6.0/bin/Rscript", required=false)
|
||||
@Argument(fullName="path_to_Rscript", shortName = "Rscript", doc = "The path to your implementation of Rscript. For Broad users this is maybe /broad/software/free/Linux/redhat_5_x86_64/pkgs/r_2.12.0/bin/Rscript", required=false)
|
||||
private String PATH_TO_RSCRIPT = "Rscript";
|
||||
@Argument(fullName="rscript_file", shortName="rscriptFile", doc="The output rscript file generated by the VQSR to aid in visualization of the input data and learned model", required=false)
|
||||
private String RSCRIPT_FILE = null;
|
||||
|
|
|
|||
|
|
@ -452,7 +452,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
|
|||
throw new UserException.CouldNotCreateOutputFile(outMVFile, "Can't open output file", e); }
|
||||
}
|
||||
else
|
||||
mvSet.add(new MendelianViolation(getToolkit(), MENDELIAN_VIOLATION_QUAL_THRESHOLD));
|
||||
mvSet.add(new MendelianViolation(FAMILY_STRUCTURE, MENDELIAN_VIOLATION_QUAL_THRESHOLD));
|
||||
}
|
||||
else if (!FAMILY_STRUCTURE.isEmpty()) {
|
||||
mvSet.add(new MendelianViolation(FAMILY_STRUCTURE, MENDELIAN_VIOLATION_QUAL_THRESHOLD));
|
||||
|
|
|
|||
|
|
@ -1,11 +1,13 @@
|
|||
package org.broadinstitute.sting.utils;
|
||||
|
||||
import org.apache.commons.lang.ArrayUtils;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.datasources.sample.Sample;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.regex.Matcher;
|
||||
|
|
@ -32,6 +34,9 @@ public class MendelianViolation {
|
|||
|
||||
private static Pattern FAMILY_PATTERN = Pattern.compile("(.*)\\+(.*)=(.*)");
|
||||
|
||||
static final int[] mvOffsets = new int[] { 1,2,5,6,8,11,15,18,20,21,24,25 };
|
||||
static final int[] nonMVOffsets = new int[]{ 0,3,4,7,9,10,12,13,14,16,17,19,22,23,26 };
|
||||
|
||||
|
||||
public String getSampleMom() {
|
||||
return sampleMom;
|
||||
|
|
@ -168,4 +173,41 @@ public class MendelianViolation {
|
|||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the likelihood ratio for a mendelian violation
|
||||
*/
|
||||
public double violationLikelihoodRatio(VariantContext vc) {
|
||||
double[] logLikAssignments = new double[27];
|
||||
// the matrix to set up is
|
||||
// MOM DAD CHILD
|
||||
// |- AA
|
||||
// AA AA | AB
|
||||
// |- BB
|
||||
// |- AA
|
||||
// AA AB | AB
|
||||
// |- BB
|
||||
// etc. The leaves are counted as 0-11 for MVs and 0-14 for non-MVs
|
||||
double[] momGL = vc.getGenotype(sampleMom).getLikelihoods().getAsVector();
|
||||
double[] dadGL = vc.getGenotype(sampleDad).getLikelihoods().getAsVector();
|
||||
double[] childGL = vc.getGenotype(sampleChild).getLikelihoods().getAsVector();
|
||||
int offset = 0;
|
||||
for ( int oMom = 0; oMom < 3; oMom++ ) {
|
||||
for ( int oDad = 0; oDad < 3; oDad++ ) {
|
||||
for ( int oChild = 0; oChild < 3; oChild ++ ) {
|
||||
logLikAssignments[offset++] = momGL[oMom] + dadGL[oDad] + childGL[oChild];
|
||||
}
|
||||
}
|
||||
}
|
||||
double[] mvLiks = new double[12];
|
||||
double[] nonMVLiks = new double[15];
|
||||
for ( int i = 0; i < 12; i ++ ) {
|
||||
mvLiks[i] = logLikAssignments[mvOffsets[i]];
|
||||
}
|
||||
|
||||
for ( int i = 0; i < 15; i++) {
|
||||
nonMVLiks[i] = logLikAssignments[nonMVOffsets[i]];
|
||||
}
|
||||
|
||||
return MathUtils.log10sumLog10(mvLiks) - MathUtils.log10sumLog10(nonMVLiks);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -53,7 +53,7 @@ public class RScriptExecutor {
|
|||
|
||||
public static class RScriptArgumentCollection {
|
||||
@Advanced
|
||||
@Argument(fullName = "path_to_Rscript", shortName = "Rscript", doc = "The path to your implementation of Rscript. For Broad users this is maybe /broad/tools/apps/R-2.6.0/bin/Rscript", required = false)
|
||||
@Argument(fullName = "path_to_Rscript", shortName = "Rscript", doc = "The path to your implementation of Rscript. For Broad users this is maybe /broad/software/free/Linux/redhat_5_x86_64/pkgs/r_2.12.0/bin/Rscript", required = false)
|
||||
public String PATH_TO_RSCRIPT = "Rscript";
|
||||
|
||||
@Advanced
|
||||
|
|
|
|||
|
|
@ -240,22 +240,34 @@ public class Utils {
|
|||
return ret.toString();
|
||||
}
|
||||
|
||||
//public static String join(String separator, Collection<String> strings) {
|
||||
// return join( separator, strings.toArray(new String[0]) );
|
||||
//}
|
||||
|
||||
public static <T> String join(String separator, Collection<T> objects) {
|
||||
if(objects.isEmpty()) {
|
||||
/**
|
||||
* Returns a string of the form elt1.toString() [sep elt2.toString() ... sep elt.toString()] for a collection of
|
||||
* elti objects (note there's no actual space between sep and the elti elements). Returns
|
||||
* "" if collection is empty. If collection contains just elt, then returns elt.toString()
|
||||
*
|
||||
* @param separator the string to use to separate objects
|
||||
* @param objects a collection of objects. the element order is defined by the iterator over objects
|
||||
* @param <T> the type of the objects
|
||||
* @return a non-null string
|
||||
*/
|
||||
public static <T> String join(final String separator, final Collection<T> objects) {
|
||||
if (objects.isEmpty()) { // fast path for empty collection
|
||||
return "";
|
||||
}
|
||||
Iterator<T> iter = objects.iterator();
|
||||
final StringBuilder ret = new StringBuilder(iter.next().toString());
|
||||
while(iter.hasNext()) {
|
||||
ret.append(separator);
|
||||
ret.append(iter.next().toString());
|
||||
}
|
||||
} else {
|
||||
final Iterator<T> iter = objects.iterator();
|
||||
final T first = iter.next();
|
||||
|
||||
return ret.toString();
|
||||
if ( ! iter.hasNext() ) // fast path for singleton collections
|
||||
return first.toString();
|
||||
else { // full path for 2+ collection that actually need a join
|
||||
final StringBuilder ret = new StringBuilder(first.toString());
|
||||
while(iter.hasNext()) {
|
||||
ret.append(separator);
|
||||
ret.append(iter.next().toString());
|
||||
}
|
||||
return ret.toString();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public static String dupString(char c, int nCopies) {
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@ import net.sf.picard.reference.ReferenceSequence;
|
|||
import net.sf.samtools.CigarElement;
|
||||
import net.sf.samtools.CigarOperator;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.SAMUtils;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
|
@ -131,19 +132,18 @@ public class BAQ {
|
|||
private final static double EM = 0.33333333333;
|
||||
private final static double EI = 0.25;
|
||||
|
||||
private double[][][] EPSILONS = new double[256][256][64];
|
||||
private double[][][] EPSILONS = new double[256][256][SAMUtils.MAX_PHRED_SCORE+1];
|
||||
|
||||
private void initializeCachedData() {
|
||||
for ( int i = 0; i < 256; i++ )
|
||||
for ( int j = 0; j < 256; j++ )
|
||||
for ( int q = 0; q < 64; q++ ) {
|
||||
double qual = qual2prob[q < minBaseQual ? minBaseQual : q];
|
||||
for ( int q = 0; q <= SAMUtils.MAX_PHRED_SCORE; q++ ) {
|
||||
EPSILONS[i][j][q] = 1.0;
|
||||
}
|
||||
|
||||
for ( char b1 : "ACGTacgt".toCharArray() ) {
|
||||
for ( char b2 : "ACGTacgt".toCharArray() ) {
|
||||
for ( int q = 0; q < 64; q++ ) {
|
||||
for ( int q = 0; q <= SAMUtils.MAX_PHRED_SCORE; q++ ) {
|
||||
double qual = qual2prob[q < minBaseQual ? minBaseQual : q];
|
||||
double e = Character.toLowerCase(b1) == Character.toLowerCase(b2) ? 1 - qual : qual * EM;
|
||||
EPSILONS[(byte)b1][(byte)b2][q] = e;
|
||||
|
|
@ -152,7 +152,7 @@ public class BAQ {
|
|||
}
|
||||
}
|
||||
|
||||
private double calcEpsilon( byte ref, byte read, byte qualB ) {
|
||||
protected double calcEpsilon( byte ref, byte read, byte qualB ) {
|
||||
return EPSILONS[ref][read][qualB];
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ import org.broad.tribble.FeatureCodec;
|
|||
import org.broad.tribble.NameAwareCodec;
|
||||
import org.broad.tribble.TribbleException;
|
||||
import org.broad.tribble.readers.LineReader;
|
||||
import org.broad.tribble.util.BlockCompressedInputStream;
|
||||
import org.broad.tribble.util.ParsingUtils;
|
||||
import org.broadinstitute.sting.gatk.refdata.SelfScopingFeatureCodec;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
|
@ -215,7 +216,7 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec,
|
|||
int nParts = ParsingUtils.split(line, parts, VCFConstants.FIELD_SEPARATOR_CHAR, true);
|
||||
|
||||
// if we have don't have a header, or we have a header with no genotyping data check that we have eight columns. Otherwise check that we have nine (normal colummns + genotyping data)
|
||||
if (( (header == null || (header != null && !header.hasGenotypingData())) && nParts != NUM_STANDARD_FIELDS) ||
|
||||
if (( (header == null || !header.hasGenotypingData()) && nParts != NUM_STANDARD_FIELDS) ||
|
||||
(header != null && header.hasGenotypingData() && nParts != (NUM_STANDARD_FIELDS + 1)) )
|
||||
throw new UserException.MalformedVCF("there aren't enough columns for line " + line + " (we expected " + (header == null ? NUM_STANDARD_FIELDS : NUM_STANDARD_FIELDS + 1) +
|
||||
" tokens, and saw " + nParts + " )", lineNo);
|
||||
|
|
@ -345,6 +346,9 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec,
|
|||
generateException("The VCF specification requires a valid info field");
|
||||
|
||||
if ( !infoField.equals(VCFConstants.EMPTY_INFO_FIELD) ) {
|
||||
if ( infoField.indexOf("\t") != -1 || infoField.indexOf(" ") != -1 )
|
||||
generateException("The VCF specification does not allow for whitespace in the INFO field");
|
||||
|
||||
int infoValueSplitSize = ParsingUtils.split(infoField, infoValueArray, VCFConstants.INFO_FIELD_SEPARATOR_CHAR);
|
||||
for (int i = 0; i < infoValueSplitSize; i++) {
|
||||
String key;
|
||||
|
|
@ -587,7 +591,8 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec,
|
|||
public final static boolean canDecodeFile(final File potentialInput, final String MAGIC_HEADER_LINE) {
|
||||
try {
|
||||
return isVCFStream(new FileInputStream(potentialInput), MAGIC_HEADER_LINE) ||
|
||||
isVCFStream(new GZIPInputStream(new FileInputStream(potentialInput)), MAGIC_HEADER_LINE);
|
||||
isVCFStream(new GZIPInputStream(new FileInputStream(potentialInput)), MAGIC_HEADER_LINE) ||
|
||||
isVCFStream(new BlockCompressedInputStream(new FileInputStream(potentialInput)), MAGIC_HEADER_LINE);
|
||||
} catch ( FileNotFoundException e ) {
|
||||
return false;
|
||||
} catch ( IOException e ) {
|
||||
|
|
@ -598,12 +603,17 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec,
|
|||
private final static boolean isVCFStream(final InputStream stream, final String MAGIC_HEADER_LINE) {
|
||||
try {
|
||||
byte[] buff = new byte[MAGIC_HEADER_LINE.length()];
|
||||
stream.read(buff, 0, MAGIC_HEADER_LINE.length());
|
||||
String firstLine = new String(buff);
|
||||
stream.close();
|
||||
return firstLine.startsWith(MAGIC_HEADER_LINE);
|
||||
int nread = stream.read(buff, 0, MAGIC_HEADER_LINE.length());
|
||||
boolean eq = Arrays.equals(buff, MAGIC_HEADER_LINE.getBytes());
|
||||
return eq;
|
||||
// String firstLine = new String(buff);
|
||||
// return firstLine.startsWith(MAGIC_HEADER_LINE);
|
||||
} catch ( IOException e ) {
|
||||
return false;
|
||||
} catch ( RuntimeException e ) {
|
||||
return false;
|
||||
} finally {
|
||||
try { stream.close(); } catch ( IOException e ) {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -316,19 +316,22 @@ public class VariantContextUtils {
|
|||
return pruneVariantContext(vc, null);
|
||||
}
|
||||
|
||||
public static VariantContext pruneVariantContext(VariantContext vc, Collection<String> keysToPreserve ) {
|
||||
MutableVariantContext mvc = new MutableVariantContext(vc);
|
||||
public static VariantContext pruneVariantContext(final VariantContext vc, final Collection<String> keysToPreserve ) {
|
||||
final MutableVariantContext mvc = new MutableVariantContext(vc);
|
||||
|
||||
if ( keysToPreserve == null || keysToPreserve.size() == 0 )
|
||||
mvc.clearAttributes();
|
||||
else {
|
||||
Map<String, Object> d = mvc.getAttributes();
|
||||
final Map<String, Object> d = mvc.getAttributes();
|
||||
mvc.clearAttributes();
|
||||
for ( String key : keysToPreserve )
|
||||
if ( d.containsKey(key) )
|
||||
mvc.putAttribute(key, d.get(key));
|
||||
}
|
||||
|
||||
// this must be done as the ID is stored in the attributes field
|
||||
if ( vc.hasID() ) mvc.setID(vc.getID());
|
||||
|
||||
Collection<Genotype> gs = mvc.getGenotypes().values();
|
||||
mvc.clearGenotypes();
|
||||
for ( Genotype g : gs ) {
|
||||
|
|
@ -443,34 +446,6 @@ public class VariantContextUtils {
|
|||
throw new ReviewedStingException(String.format("Couldn't find master VCF %s at %s", masterName, unsortedVCs.iterator().next()));
|
||||
}
|
||||
|
||||
|
||||
public static VariantContext simpleMerge(GenomeLocParser genomeLocParser, Collection<VariantContext> unsortedVCs, byte refBase) {
|
||||
return simpleMerge(genomeLocParser, unsortedVCs, null, FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, GenotypeMergeType.UNSORTED, false, false, refBase);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Merges VariantContexts into a single hybrid. Takes genotypes for common samples in priority order, if provided.
|
||||
* If uniqifySamples is true, the priority order is ignored and names are created by concatenating the VC name with
|
||||
* the sample name
|
||||
*
|
||||
* @param genomeLocParser loc parser
|
||||
* @param unsortedVCs collection of unsorted VCs
|
||||
* @param priorityListOfVCs priority list detailing the order in which we should grab the VCs
|
||||
* @param filteredRecordMergeType merge type for filtered records
|
||||
* @param genotypeMergeOptions merge option for genotypes
|
||||
* @param annotateOrigin should we annotate the set it came from?
|
||||
* @param printMessages should we print messages?
|
||||
* @param inputRefBase the ref base
|
||||
* @return new VariantContext
|
||||
*/
|
||||
public static VariantContext simpleMerge(GenomeLocParser genomeLocParser, Collection<VariantContext> unsortedVCs, List<String> priorityListOfVCs,
|
||||
FilteredRecordMergeType filteredRecordMergeType, GenotypeMergeType genotypeMergeOptions,
|
||||
boolean annotateOrigin, boolean printMessages, byte inputRefBase ) {
|
||||
|
||||
return simpleMerge(genomeLocParser, unsortedVCs, priorityListOfVCs, filteredRecordMergeType, genotypeMergeOptions, annotateOrigin, printMessages, "set", false, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Merges VariantContexts into a single hybrid. Takes genotypes for common samples in priority order, if provided.
|
||||
* If uniqifySamples is true, the priority order is ignored and names are created by concatenating the VC name with
|
||||
|
|
@ -486,12 +461,18 @@ public class VariantContextUtils {
|
|||
* @param setKey the key name of the set
|
||||
* @param filteredAreUncalled are filtered records uncalled?
|
||||
* @param mergeInfoWithMaxAC should we merge in info from the VC with maximum allele count?
|
||||
* @return new VariantContext
|
||||
* @return new VariantContext representing the merge of unsortedVCs
|
||||
*/
|
||||
public static VariantContext simpleMerge(GenomeLocParser genomeLocParser, Collection<VariantContext> unsortedVCs, List<String> priorityListOfVCs,
|
||||
FilteredRecordMergeType filteredRecordMergeType, GenotypeMergeType genotypeMergeOptions,
|
||||
boolean annotateOrigin, boolean printMessages, String setKey,
|
||||
boolean filteredAreUncalled, boolean mergeInfoWithMaxAC ) {
|
||||
public static VariantContext simpleMerge(final GenomeLocParser genomeLocParser,
|
||||
final Collection<VariantContext> unsortedVCs,
|
||||
final List<String> priorityListOfVCs,
|
||||
final FilteredRecordMergeType filteredRecordMergeType,
|
||||
final GenotypeMergeType genotypeMergeOptions,
|
||||
final boolean annotateOrigin,
|
||||
final boolean printMessages,
|
||||
final String setKey,
|
||||
final boolean filteredAreUncalled,
|
||||
final boolean mergeInfoWithMaxAC ) {
|
||||
if ( unsortedVCs == null || unsortedVCs.size() == 0 )
|
||||
return null;
|
||||
|
||||
|
|
@ -514,26 +495,28 @@ public class VariantContextUtils {
|
|||
return null;
|
||||
|
||||
// establish the baseline info from the first VC
|
||||
VariantContext first = VCs.get(0);
|
||||
String name = first.getSource();
|
||||
GenomeLoc loc = getLocation(genomeLocParser,first);
|
||||
final VariantContext first = VCs.get(0);
|
||||
final String name = first.getSource();
|
||||
final Allele refAllele = determineReferenceAllele(VCs);
|
||||
|
||||
Set<Allele> alleles = new TreeSet<Allele>();
|
||||
Map<String, Genotype> genotypes = new TreeMap<String, Genotype>();
|
||||
double negLog10PError = -1;
|
||||
Set<String> filters = new TreeSet<String>();
|
||||
Map<String, Object> attributes = new TreeMap<String, Object>();
|
||||
Set<String> inconsistentAttributes = new HashSet<String>();
|
||||
String rsID = null;
|
||||
final Set<Allele> alleles = new TreeSet<Allele>();
|
||||
final Set<String> filters = new TreeSet<String>();
|
||||
final Map<String, Object> attributes = new TreeMap<String, Object>();
|
||||
final Set<String> inconsistentAttributes = new HashSet<String>();
|
||||
final Set<String> variantSources = new HashSet<String>(); // contains the set of sources we found in our set of VCs that are variant
|
||||
final Set<String> rsIDs = new LinkedHashSet<String>(1); // most of the time there's one id
|
||||
|
||||
GenomeLoc loc = getLocation(genomeLocParser,first);
|
||||
int depth = 0;
|
||||
int maxAC = -1;
|
||||
Map<String, Object> attributesWithMaxAC = new TreeMap<String, Object>();
|
||||
final Map<String, Object> attributesWithMaxAC = new TreeMap<String, Object>();
|
||||
double negLog10PError = -1;
|
||||
VariantContext vcWithMaxAC = null;
|
||||
Map<String, Genotype> genotypes = new TreeMap<String, Genotype>();
|
||||
|
||||
// counting the number of filtered and variant VCs
|
||||
int nFiltered = 0, nVariant = 0;
|
||||
int nFiltered = 0;
|
||||
|
||||
Allele refAllele = determineReferenceAllele(VCs);
|
||||
boolean remapped = false;
|
||||
|
||||
// cycle through and add info from the other VCs, making sure the loc/reference matches
|
||||
|
|
@ -546,7 +529,7 @@ public class VariantContextUtils {
|
|||
loc = getLocation(genomeLocParser,vc); // get the longest location
|
||||
|
||||
nFiltered += vc.isFiltered() ? 1 : 0;
|
||||
nVariant += vc.isVariant() ? 1 : 0;
|
||||
if ( vc.isVariant() ) variantSources.add(vc.getSource());
|
||||
|
||||
AlleleMapper alleleMapping = resolveIncompatibleAlleles(refAllele, vc, alleles);
|
||||
remapped = remapped || alleleMapping.needsRemapping();
|
||||
|
|
@ -566,8 +549,7 @@ public class VariantContextUtils {
|
|||
//
|
||||
if (vc.hasAttribute(VCFConstants.DEPTH_KEY))
|
||||
depth += vc.getAttributeAsInt(VCFConstants.DEPTH_KEY, 0);
|
||||
if (rsID == null && vc.hasID())
|
||||
rsID = vc.getID();
|
||||
if ( vc.hasID() && ! vc.getID().equals(VCFConstants.EMPTY_ID_FIELD) ) rsIDs.add(vc.getID());
|
||||
if (mergeInfoWithMaxAC && vc.hasAttribute(VCFConstants.ALLELE_COUNT_KEY)) {
|
||||
String rawAlleleCounts = vc.getAttributeAsString(VCFConstants.ALLELE_COUNT_KEY, null);
|
||||
// lets see if the string contains a , separator
|
||||
|
|
@ -627,17 +609,16 @@ public class VariantContextUtils {
|
|||
if ( filteredRecordMergeType == FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED && nFiltered != VCs.size() )
|
||||
filters.clear();
|
||||
|
||||
// we care about where the call came from
|
||||
if ( annotateOrigin ) {
|
||||
if ( annotateOrigin ) { // we care about where the call came from
|
||||
String setValue;
|
||||
if ( nFiltered == 0 && nVariant == priorityListOfVCs.size() ) // nothing was unfiltered
|
||||
if ( nFiltered == 0 && variantSources.size() == priorityListOfVCs.size() ) // nothing was unfiltered
|
||||
setValue = "Intersection";
|
||||
else if ( nFiltered == VCs.size() ) // everything was filtered out
|
||||
setValue = "FilteredInAll";
|
||||
else if ( nVariant == 0 ) // everyone was reference
|
||||
else if ( variantSources.isEmpty() ) // everyone was reference
|
||||
setValue = "ReferenceInAll";
|
||||
else { // we are filtered in some subset
|
||||
List<String> s = new ArrayList<String>();
|
||||
else {
|
||||
LinkedHashSet<String> s = new LinkedHashSet<String>();
|
||||
for ( VariantContext vc : VCs )
|
||||
if ( vc.isVariant() )
|
||||
s.add( vc.isFiltered() ? "filterIn" + vc.getSource() : vc.getSource() );
|
||||
|
|
@ -652,8 +633,10 @@ public class VariantContextUtils {
|
|||
|
||||
if ( depth > 0 )
|
||||
attributes.put(VCFConstants.DEPTH_KEY, String.valueOf(depth));
|
||||
if ( rsID != null )
|
||||
attributes.put(VariantContext.ID_KEY, rsID);
|
||||
|
||||
if ( ! rsIDs.isEmpty() ) {
|
||||
attributes.put(VariantContext.ID_KEY, Utils.join(",", rsIDs));
|
||||
}
|
||||
|
||||
VariantContext merged = new VariantContext(name, loc.getContig(), loc.getStart(), loc.getStop(), alleles, genotypes, negLog10PError, filters, (mergeInfoWithMaxAC ? attributesWithMaxAC : attributes) );
|
||||
// Trim the padded bases of all alleles if necessary
|
||||
|
|
|
|||
|
|
@ -56,6 +56,7 @@ public class FeatureManagerUnitTest extends BaseTest {
|
|||
private static final File VCF3_FILE = new File(validationDataLocation + "vcfexample3.vcf");
|
||||
private static final File VCF4_FILE = new File(testDir + "HiSeq.10000.vcf");
|
||||
private static final File VCF4_FILE_GZ = new File(testDir + "HiSeq.10000.vcf.gz");
|
||||
private static final File VCF4_FILE_BGZIP = new File(testDir + "HiSeq.10000.bgzip.vcf.gz");
|
||||
|
||||
private FeatureManager manager;
|
||||
private GenomeLocParser genomeLocParser;
|
||||
|
|
@ -109,6 +110,7 @@ public class FeatureManagerUnitTest extends BaseTest {
|
|||
new FMTest(VariantContext.class, VCF3Codec.class, "VCF3", VCF3_FILE);
|
||||
new FMTest(VariantContext.class, VCFCodec.class, "VCF", VCF4_FILE);
|
||||
new FMTest(VariantContext.class, VCFCodec.class, "VCF", VCF4_FILE_GZ);
|
||||
new FMTest(VariantContext.class, VCFCodec.class, "VCF", VCF4_FILE_BGZIP);
|
||||
new FMTest(TableFeature.class, BedTableCodec.class, "bedtable", null);
|
||||
return FMTest.getTests(FMTest.class);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -96,8 +96,8 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test public void uniqueSNPs() { combine2("pilot2.snps.vcf4.genotypes.vcf", "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "", "89f55abea8f59e39d1effb908440548c"); }
|
||||
|
||||
@Test public void omniHM3Union() { combineSites(" -filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED", "4836086891f6cbdd40eebef3076d215a"); }
|
||||
@Test public void omniHM3Intersect() { combineSites(" -filteredRecordsMergeType KEEP_IF_ALL_UNFILTERED", "6a34b5d743efda8b2f3b639f3a2f5de8"); }
|
||||
@Test public void omniHM3Union() { combineSites(" -filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED", "c6adeda751cb2a08690dd9202356629f"); }
|
||||
@Test public void omniHM3Intersect() { combineSites(" -filteredRecordsMergeType KEEP_IF_ALL_UNFILTERED", "3a08fd5ee18993dfc8882156ccf5d2e9"); }
|
||||
|
||||
@Test public void threeWayWithRefs() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
|
|
@ -131,4 +131,13 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
|
|||
@Test public void complexTestMinimal() { combineComplexSites(" -minimalVCF", "df96cb3beb2dbb5e02f80abec7d3571e"); }
|
||||
@Test public void complexTestSitesOnly() { combineComplexSites(" -sites_only", "f704caeaaaed6711943014b847fe381a"); }
|
||||
@Test public void complexTestSitesOnlyMinimal() { combineComplexSites(" -sites_only -minimalVCF", "f704caeaaaed6711943014b847fe381a"); }
|
||||
|
||||
@Test
|
||||
public void combineDBSNPDuplicateSites() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T CombineVariants -NO_HEADER -L 1:902000-903000 -o %s -R " + b37KGReference + " -V:v1 " + b37dbSNP132,
|
||||
1,
|
||||
Arrays.asList(""));
|
||||
executeTest("combineDBSNPDuplicateSites:", spec);
|
||||
}
|
||||
}
|
||||
|
|
@ -172,6 +172,17 @@ public class BAQUnitTest extends BaseTest {
|
|||
}
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testBAQQualRange() {
|
||||
BAQ baq = new BAQ(1e-3, 0.1, 7, (byte)4, false); // matches current samtools parameters
|
||||
final byte ref = (byte)'A';
|
||||
final byte alt = (byte)'A';
|
||||
|
||||
for ( int i = 0; i <= SAMUtils.MAX_PHRED_SCORE; i++ )
|
||||
Assert.assertTrue(baq.calcEpsilon( ref, alt, (byte)i) >= 0.0, "Failed to get baq epsilon range");
|
||||
}
|
||||
|
||||
|
||||
public void testBAQ(BAQTest test, boolean lookupWithFasta) {
|
||||
BAQ baqHMM = new BAQ(1e-3, 0.1, 7, (byte)4, false); // matches current samtools parameters
|
||||
|
||||
|
|
|
|||
Binary file not shown.
Loading…
Reference in New Issue