Merge branch 'master' of ssh://gsa2.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
c9111bb23e
|
|
@ -1,9 +1,9 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
|
||||
import org.broadinstitute.sting.WalkerTest;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.Arrays;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
|
|
@ -52,7 +52,7 @@ public class UnifiedGenotyperGeneralPloidyIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test(enabled = true)
|
||||
public void testINDEL_GGA_Pools() {
|
||||
PC_LSV_Test(String.format(" -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_INDEL_GGA","INDEL","90af837f372e3d5143af30bf5c8c2b75");
|
||||
PC_LSV_Test(String.format(" -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_INDEL_GGA","INDEL","567ae6b2a7f839b1307d4087c2f59cca");
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
|
|
@ -62,7 +62,7 @@ public class UnifiedGenotyperGeneralPloidyIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test(enabled = true)
|
||||
public void testINDEL_maxAltAlleles2_ploidy1_Pools_noRef() {
|
||||
PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","INDEL","26598044436c8044f22ffa767b06a0f0");
|
||||
PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","INDEL","d2a22e12f1969ae199557947e5039b58");
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
|
|
|
|||
|
|
@ -73,4 +73,12 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("8d092b25f40456e618eef91fdce8adf0"));
|
||||
executeTest("HCTestProblematicReadsModifiedInActiveRegions: ", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void HCTestStructuralIndels() {
|
||||
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "AFR.structural.indels.bam") + " --no_cmdline_in_header -o %s -minPruning 6 -L 20:8187565-8187800 -L 20:18670537-18670730";
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("c29e61810c056b52a47baae0696931ea"));
|
||||
executeTest("HCTestStructuralIndels: ", spec);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -38,7 +38,6 @@ import org.broadinstitute.sting.utils.*;
|
|||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
|
|
@ -198,23 +197,7 @@ public class UnifiedGenotyperEngine {
|
|||
}
|
||||
}
|
||||
|
||||
return addMissingSamples(results, allSamples);
|
||||
}
|
||||
|
||||
private List<VariantCallContext> addMissingSamples(final List<VariantCallContext> calls, final Set<String> allSamples) {
|
||||
if ( calls.isEmpty() || allSamples == null ) return calls;
|
||||
|
||||
final List<VariantCallContext> withAllSamples = new ArrayList<VariantCallContext>(calls.size());
|
||||
for ( final VariantCallContext call : calls ) {
|
||||
if ( call == null )
|
||||
withAllSamples.add(null);
|
||||
else {
|
||||
final VariantContext withoutMissing = VariantContextUtils.addMissingSamples(call, allSamples);
|
||||
withAllSamples.add(new VariantCallContext(withoutMissing, call.confidentlyCalled, call.shouldEmit));
|
||||
}
|
||||
}
|
||||
|
||||
return withAllSamples;
|
||||
return results;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -313,7 +313,7 @@ public class CombineVariants extends RodWalker<Integer, Integer> implements Tree
|
|||
VariantContextUtils.calculateChromosomeCounts(builder, false);
|
||||
if ( minimalVCF )
|
||||
VariantContextUtils.pruneVariantContext(builder, Arrays.asList(SET_KEY));
|
||||
vcfWriter.add(VariantContextUtils.addMissingSamples(builder.make(), samples));
|
||||
vcfWriter.add(builder.make());
|
||||
}
|
||||
|
||||
return vcs.isEmpty() ? 0 : 1;
|
||||
|
|
|
|||
|
|
@ -40,6 +40,7 @@ import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyper;
|
|||
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
|
||||
import org.broadinstitute.sting.utils.MendelianViolation;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter;
|
||||
|
|
@ -325,6 +326,9 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
|||
@Argument(doc="indel size select",required=false,fullName="maxIndelSize")
|
||||
private int maxIndelSize = Integer.MAX_VALUE;
|
||||
|
||||
@Argument(doc="Allow a samples other than those in the VCF to be specified on the command line. These samples will be ignored.",required=false,fullName="ALLOW_NONOVERLAPPING_COMMAND_LINE_SAMPLES")
|
||||
private boolean ALLOW_NONOVERLAPPING_COMMAND_LINE_SAMPLES = false;
|
||||
|
||||
|
||||
/* Private class used to store the intermediate variants in the integer random selection process */
|
||||
private static class RandomVariantStructure {
|
||||
|
|
@ -386,10 +390,31 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
|||
Collection<String> samplesFromFile = SampleUtils.getSamplesFromFiles(sampleFiles);
|
||||
Collection<String> samplesFromExpressions = SampleUtils.matchSamplesExpressions(vcfSamples, sampleExpressions);
|
||||
|
||||
// first, add any requested samples
|
||||
samples.addAll(samplesFromFile);
|
||||
samples.addAll(samplesFromExpressions);
|
||||
// first, check overlap between requested and present samples
|
||||
Set<String> commandLineUniqueSamples = new HashSet<String>(samplesFromFile.size()+samplesFromExpressions.size()+sampleNames.size());
|
||||
commandLineUniqueSamples.addAll(samplesFromFile);
|
||||
commandLineUniqueSamples.addAll(samplesFromExpressions);
|
||||
commandLineUniqueSamples.addAll(sampleNames);
|
||||
commandLineUniqueSamples.removeAll(vcfSamples);
|
||||
|
||||
// second, add the requested samples
|
||||
samples.addAll(sampleNames);
|
||||
samples.addAll(samplesFromExpressions);
|
||||
samples.addAll(samplesFromFile);
|
||||
|
||||
logger.debug(Utils.join(",",commandLineUniqueSamples));
|
||||
|
||||
if ( commandLineUniqueSamples.size() > 0 && ALLOW_NONOVERLAPPING_COMMAND_LINE_SAMPLES ) {
|
||||
logger.warn("Samples present on command line input that are not present in the VCF. These samples will be ignored.");
|
||||
samples.removeAll(commandLineUniqueSamples);
|
||||
} else if (commandLineUniqueSamples.size() > 0 ) {
|
||||
throw new UserException.BadInput(String.format("%s%n%n%s%n%n%s%n%n%s",
|
||||
"Samples entered on command line (through -sf or -sn) that are not present in the VCF.",
|
||||
"A list of these samples:",
|
||||
Utils.join(",",commandLineUniqueSamples),
|
||||
"To ignore these samples, run with --ALLOW_NONOVERLAPPING_COMMAND_LINE_SAMPLES"));
|
||||
}
|
||||
|
||||
|
||||
// if none were requested, we want all of them
|
||||
if ( samples.isEmpty() ) {
|
||||
|
|
|
|||
|
|
@ -7,7 +7,9 @@ import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgume
|
|||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.Requires;
|
||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
|
@ -15,6 +17,7 @@ import org.broadinstitute.sting.utils.exceptions.UserException;
|
|||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.sting.utils.text.XReadLines;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.GenotypeLikelihoods;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
|
||||
|
||||
|
|
@ -278,7 +281,7 @@ public class VariantsToBinaryPed extends RodWalker<Integer,Integer> {
|
|||
|
||||
private byte getFlippedEncoding(Genotype g, int offset) {
|
||||
byte b;
|
||||
if ( g.hasGQ() && g.getGQ() < minGenotypeQuality ) {
|
||||
if ( ! checkGQIsGood(g) ) {
|
||||
b = NO_CALL;
|
||||
} else if ( g.isHomRef() ) {
|
||||
b = HOM_VAR;
|
||||
|
|
@ -293,6 +296,16 @@ public class VariantsToBinaryPed extends RodWalker<Integer,Integer> {
|
|||
return (byte) (b << (2*offset));
|
||||
}
|
||||
|
||||
private boolean checkGQIsGood(Genotype genotype) {
|
||||
if ( genotype.hasGQ() ) {
|
||||
return genotype.getGQ() >= minGenotypeQuality;
|
||||
} else if ( genotype.hasLikelihoods() ) {
|
||||
return GenotypeLikelihoods.getGQLog10FromLikelihoods(genotype.getType().ordinal()-1,genotype.getLikelihoods().getAsVector()) >= minGenotypeQuality;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
private static String getID(VariantContext v) {
|
||||
if ( v.hasID() ) {
|
||||
return v.getID();
|
||||
|
|
|
|||
|
|
@ -246,7 +246,6 @@ public class VariantsToVCF extends RodWalker<Integer, Integer> {
|
|||
}
|
||||
|
||||
vc = VariantContextUtils.purgeUnallowedGenotypeAttributes(vc, allowedGenotypeFormatStrings);
|
||||
vc = VariantContextUtils.addMissingSamples(vc, samples);
|
||||
vcfwriter.add(vc);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -88,8 +88,8 @@ public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCF
|
|||
case UNBOUNDED: return -1;
|
||||
case A: return vc.getNAlleles() - 1;
|
||||
case G:
|
||||
final int ploidy = vc.getMaxPloidy();
|
||||
return GenotypeLikelihoods.numLikelihoods(vc.getNAlleles(), ploidy == 0 ? 2 : ploidy);
|
||||
final int ploidy = vc.getMaxPloidy(2);
|
||||
return GenotypeLikelihoods.numLikelihoods(vc.getNAlleles(), ploidy);
|
||||
default:
|
||||
throw new ReviewedStingException("Unknown count type: " + countType);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -53,6 +53,9 @@ import java.util.*;
|
|||
*/
|
||||
@Invariant({"alleles != null"})
|
||||
public final class GenotypeBuilder {
|
||||
private static final List<Allele> HAPLOID_NO_CALL = Arrays.asList(Allele.NO_CALL);
|
||||
private static final List<Allele> DIPLOID_NO_CALL = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);
|
||||
|
||||
private String sampleName = null;
|
||||
private List<Allele> alleles = Collections.emptyList();
|
||||
|
||||
|
|
@ -90,6 +93,23 @@ public final class GenotypeBuilder {
|
|||
return new GenotypeBuilder(sampleName, alleles).PL(gls).make();
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new Genotype object for a sample that's missing from the VC (i.e., in
|
||||
* the output header). Defaults to a diploid no call genotype ./.
|
||||
*
|
||||
* @param sampleName the name of this sample
|
||||
* @return an initialized Genotype with sampleName that's a diploid ./. no call genotype
|
||||
*/
|
||||
public static Genotype createMissing(final String sampleName, final int ploidy) {
|
||||
final GenotypeBuilder builder = new GenotypeBuilder(sampleName);
|
||||
switch ( ploidy ) {
|
||||
case 1: builder.alleles(HAPLOID_NO_CALL); break;
|
||||
case 2: builder.alleles(DIPLOID_NO_CALL); break;
|
||||
default: builder.alleles(Collections.nCopies(ploidy, Allele.NO_CALL)); break;
|
||||
}
|
||||
return builder.make();
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a empty builder. Both a sampleName and alleles must be provided
|
||||
* before trying to make a Genotype from this builder.
|
||||
|
|
|
|||
|
|
@ -34,6 +34,7 @@ import org.broadinstitute.sting.utils.exceptions.UserException;
|
|||
|
||||
import java.util.Arrays;
|
||||
import java.util.EnumMap;
|
||||
import java.util.List;
|
||||
|
||||
public class GenotypeLikelihoods {
|
||||
private final static int NUM_LIKELIHOODS_CACHE_N_ALLELES = 5;
|
||||
|
|
@ -167,10 +168,36 @@ public class GenotypeLikelihoods {
|
|||
|
||||
//Return the neg log10 Genotype Quality (GQ) for the given genotype
|
||||
//Returns Double.NEGATIVE_INFINITY in case of missing genotype
|
||||
|
||||
/**
|
||||
* This is really dangerous and returns completely wrong results for genotypes from a multi-allelic context.
|
||||
* Use getLog10GQ(Genotype,VariantContext) or getLog10GQ(Genotype,List<Allele>) in place of it.
|
||||
*
|
||||
* If you **know** you're biallelic, use getGQLog10FromLikelihoods directly.
|
||||
* @param genotype - actually a genotype type (no call, hom ref, het, hom var)
|
||||
* @return an unsafe quantity that could be negative. In the bi-allelic case, the GQ resulting from best minus next best (if the type is the best).
|
||||
*/
|
||||
@Deprecated
|
||||
public double getLog10GQ(GenotypeType genotype){
|
||||
return getGQLog10FromLikelihoods(genotype.ordinal() - 1 /* NO_CALL IS FIRST */, getAsVector());
|
||||
}
|
||||
|
||||
@Requires({"genotypeAlleles != null","genotypeAlleles.size()==2","contextAlleles != null","contextAlleles.size() >= 1"})
|
||||
private double getLog10GQ(List<Allele> genotypeAlleles,List<Allele> contextAlleles) {
|
||||
int allele1Index = contextAlleles.indexOf(genotypeAlleles.get(0));
|
||||
int allele2Index = contextAlleles.indexOf(genotypeAlleles.get(1));
|
||||
int plIndex = calculatePLindex(allele1Index,allele2Index);
|
||||
return getGQLog10FromLikelihoods(plIndex,getAsVector());
|
||||
}
|
||||
|
||||
public double getLog10GQ(Genotype genotype, List<Allele> vcAlleles ) {
|
||||
return getLog10GQ(genotype.getAlleles(),vcAlleles);
|
||||
}
|
||||
|
||||
public double getLog10GQ(Genotype genotype, VariantContext context) {
|
||||
return getLog10GQ(genotype,context.getAlleles());
|
||||
}
|
||||
|
||||
public static double getGQLog10FromLikelihoods(int iOfChoosenGenotype, double[] likelihoods){
|
||||
if(likelihoods == null)
|
||||
return Double.NEGATIVE_INFINITY;
|
||||
|
|
|
|||
|
|
@ -25,7 +25,6 @@
|
|||
package org.broadinstitute.sting.utils.variantcontext;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Invariant;
|
||||
import com.google.java.contract.Requires;
|
||||
|
||||
import java.util.*;
|
||||
|
|
@ -413,14 +412,26 @@ public class GenotypesContext implements List<Genotype> {
|
|||
return getGenotypes().get(i);
|
||||
}
|
||||
|
||||
/**
|
||||
* What is the max ploidy among all samples? Returns defaultPloidy if no genotypes are present
|
||||
*
|
||||
* @param defaultPloidy the default ploidy, if all samples are no-called
|
||||
* @return
|
||||
*/
|
||||
@Ensures("result >= 0")
|
||||
public int getMaxPloidy() {
|
||||
public int getMaxPloidy(final int defaultPloidy) {
|
||||
if ( defaultPloidy < 0 ) throw new IllegalArgumentException("defaultPloidy must be greater than or equal to 0");
|
||||
|
||||
if ( maxPloidy == -1 ) {
|
||||
maxPloidy = 0; // necessary in the case where there are no genotypes
|
||||
for ( final Genotype g : getGenotypes() ) {
|
||||
maxPloidy = Math.max(g.getPloidy(), maxPloidy);
|
||||
}
|
||||
|
||||
// everything is no called so we return the default ploidy
|
||||
if ( maxPloidy == 0 ) maxPloidy = defaultPloidy;
|
||||
}
|
||||
|
||||
return maxPloidy;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -642,14 +642,15 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
|||
}
|
||||
|
||||
/**
|
||||
* Returns the maximum ploidy of all samples in this VC, or -1 if there are no genotypes
|
||||
* Returns the maximum ploidy of all samples in this VC, or default if there are no genotypes
|
||||
*
|
||||
* This function is caching, so it's only expensive on the first call
|
||||
*
|
||||
* @return -1, or the max ploidy
|
||||
* @param defaultPloidy the default ploidy, if all samples are no-called
|
||||
* @return default, or the max ploidy
|
||||
*/
|
||||
public int getMaxPloidy() {
|
||||
return genotypes.getMaxPloidy();
|
||||
public int getMaxPloidy(final int defaultPloidy) {
|
||||
return genotypes.getMaxPloidy(defaultPloidy);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -32,8 +32,8 @@ import org.apache.log4j.Logger;
|
|||
import org.broad.tribble.util.popgen.HardyWeinbergCalculation;
|
||||
import org.broadinstitute.sting.commandline.Hidden;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
||||
|
|
@ -47,7 +47,6 @@ public class VariantContextUtils {
|
|||
public final static String MERGE_REF_IN_ALL = "ReferenceInAll";
|
||||
public final static String MERGE_FILTER_PREFIX = "filterIn";
|
||||
|
||||
private static final List<Allele> DIPLOID_NO_CALL = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);
|
||||
private static Set<String> MISSING_KEYS_WARNED_ABOUT = new HashSet<String>();
|
||||
|
||||
final public static JexlEngine engine = new JexlEngine();
|
||||
|
|
@ -60,31 +59,6 @@ public class VariantContextUtils {
|
|||
engine.setDebug(false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Ensures that VC contains all of the samples in allSamples by adding missing samples to
|
||||
* the resulting VC with default diploid ./. genotypes
|
||||
*
|
||||
* @param vc the VariantContext
|
||||
* @param allSamples all of the samples needed
|
||||
* @return a new VariantContext with missing samples added
|
||||
*/
|
||||
public static VariantContext addMissingSamples(final VariantContext vc, final Set<String> allSamples) {
|
||||
// TODO -- what's the fastest way to do this calculation?
|
||||
final Set<String> missingSamples = new HashSet<String>(allSamples);
|
||||
missingSamples.removeAll(vc.getSampleNames());
|
||||
|
||||
if ( missingSamples.isEmpty() )
|
||||
return vc;
|
||||
else {
|
||||
//logger.warn("Adding " + missingSamples.size() + " missing samples to called context");
|
||||
final GenotypesContext gc = GenotypesContext.copy(vc.getGenotypes());
|
||||
for ( final String missing : missingSamples ) {
|
||||
gc.add(new GenotypeBuilder(missing).alleles(DIPLOID_NO_CALL).make());
|
||||
}
|
||||
return new VariantContextBuilder(vc).genotypes(gc).make();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Update the attributes of the attributes map given the VariantContext to reflect the
|
||||
* proper chromosome-based VCF tags
|
||||
|
|
|
|||
|
|
@ -272,11 +272,7 @@ public abstract class BCF2FieldWriter {
|
|||
|
||||
encodingType = BCF2Type.INT8;
|
||||
buildAlleleMap(vc);
|
||||
nValuesPerGenotype = vc.getMaxPloidy();
|
||||
|
||||
// deal with the case where we have no call everywhere, in which case we write out diploid
|
||||
if ( nValuesPerGenotype == -1 )
|
||||
nValuesPerGenotype = 2;
|
||||
nValuesPerGenotype = vc.getMaxPloidy(2);
|
||||
|
||||
super.start(encoder, vc);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -32,7 +32,10 @@ import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Codec;
|
|||
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Type;
|
||||
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Utils;
|
||||
import org.broadinstitute.sting.utils.codecs.bcf2.BCFVersion;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFContigHeaderLine;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||
|
|
@ -345,10 +348,12 @@ class BCF2Writer extends IndexingVariantContextWriter {
|
|||
final BCF2FieldWriter.GenotypesWriter writer = fieldManager.getGenotypeFieldWriter(field);
|
||||
if ( writer == null ) errorUnexpectedFieldToWrite(vc, field, "FORMAT");
|
||||
|
||||
assert writer != null;
|
||||
|
||||
writer.start(encoder, vc);
|
||||
for ( final String name : sampleNames ) {
|
||||
Genotype g = vc.getGenotype(name);
|
||||
if ( g == null ) VCFWriter.missingSampleError(vc, header);
|
||||
if ( g == null ) g = GenotypeBuilder.createMissing(name, writer.nValuesPerGenotype);
|
||||
writer.addGenotype(encoder, vc, g);
|
||||
}
|
||||
writer.done(encoder, vc);
|
||||
|
|
|
|||
|
|
@ -27,7 +27,6 @@ package org.broadinstitute.sting.utils.variantcontext.writer;
|
|||
import net.sf.samtools.SAMSequenceDictionary;
|
||||
import org.broad.tribble.TribbleException;
|
||||
import org.broad.tribble.util.ParsingUtils;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
|
@ -339,13 +338,13 @@ class VCFWriter extends IndexingVariantContextWriter {
|
|||
*/
|
||||
private void addGenotypeData(VariantContext vc, Map<Allele, String> alleleMap, List<String> genotypeFormatKeys)
|
||||
throws IOException {
|
||||
final int ploidy = vc.getMaxPloidy(2);
|
||||
|
||||
for ( String sample : mHeader.getGenotypeSamples() ) {
|
||||
mWriter.write(VCFConstants.FIELD_SEPARATOR);
|
||||
|
||||
Genotype g = vc.getGenotype(sample);
|
||||
if ( g == null ) {
|
||||
missingSampleError(vc, mHeader);
|
||||
}
|
||||
if ( g == null ) g = GenotypeBuilder.createMissing(sample, ploidy);
|
||||
|
||||
final List<String> attrs = new ArrayList<String>(genotypeFormatKeys.size());
|
||||
for ( String field : genotypeFormatKeys ) {
|
||||
|
|
@ -426,13 +425,6 @@ class VCFWriter extends IndexingVariantContextWriter {
|
|||
}
|
||||
}
|
||||
|
||||
public static final void missingSampleError(final VariantContext vc, final VCFHeader header) {
|
||||
final List<String> badSampleNames = new ArrayList<String>();
|
||||
for ( final String x : header.getGenotypeSamples() )
|
||||
if ( ! vc.hasGenotype(x) ) badSampleNames.add(x);
|
||||
throw new ReviewedStingException("BUG: we now require all samples in VCFheader to have genotype objects. Missing samples are " + Utils.join(",", badSampleNames));
|
||||
}
|
||||
|
||||
private boolean isMissingValue(String s) {
|
||||
// we need to deal with the case that it's a list of missing values
|
||||
return (countOccurrences(VCFConstants.MISSING_VALUE_v4.charAt(0), s) + countOccurrences(',', s) == s.length());
|
||||
|
|
|
|||
|
|
@ -70,6 +70,20 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
|||
executeTest("testComplexSelection--" + testfile, spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testComplexSelectionWithNonExistingSamples() {
|
||||
String testfile = validationDataLocation + "test.filtered.maf_annotated.vcf";
|
||||
String samplesFile = validationDataLocation + "SelectVariants.samples.txt";
|
||||
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString(" --ALLOW_NONOVERLAPPING_COMMAND_LINE_SAMPLES -sn A -se '[CDH]' -sn Z -sn T -sf " + samplesFile + " -env -ef -select 'DP < 250' --variant " + testfile),
|
||||
1,
|
||||
Arrays.asList("4386fbb258dcef4437495a37f5a83c53")
|
||||
);
|
||||
spec.disableShadowBCF();
|
||||
executeTest("testComplexSelectionWithNonExistingSamples--" + testfile, spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNonExistingFieldSelection() {
|
||||
String testfile = validationDataLocation + "test.filtered.maf_annotated.vcf";
|
||||
|
|
@ -98,6 +112,21 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
|||
executeTest("testSampleExclusion--" + testfile, spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSampleInclusionWithNonexistingSamples() {
|
||||
String testfile = validationDataLocation + "test.filtered.maf_annotated.vcf";
|
||||
String samplesFile = validationDataLocation + "SelectVariants.samples.txt";
|
||||
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T SelectVariants -R " + b36KGReference + " -L 1:1-1000000 -o %s --no_cmdline_in_header -sn A -sn Z -sn Q -sf " + samplesFile + " --variant " + testfile,
|
||||
1,
|
||||
UserException.BadInput.class
|
||||
);
|
||||
spec.disableShadowBCF();
|
||||
|
||||
executeTest("testSampleInclusionWithNonexistingSamples--" + testfile, spec);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testConcordance() {
|
||||
|
|
|
|||
|
|
@ -29,12 +29,15 @@ package org.broadinstitute.sting.utils.variantcontext;
|
|||
// the imports for unit testing.
|
||||
|
||||
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.EnumMap;
|
||||
import java.util.List;
|
||||
|
||||
|
||||
/**
|
||||
|
|
@ -44,6 +47,7 @@ public class GenotypeLikelihoodsUnitTest {
|
|||
double [] v = new double[]{-10.5, -1.25, -5.11};
|
||||
final static String vGLString = "-10.50,-1.25,-5.11";
|
||||
final static String vPLString = "93,0,39";
|
||||
double[] triAllelic = new double[]{-4.2,-2.0,-3.0,-1.6,0.0,-4.0}; //AA,AB,AC,BB,BC,CC
|
||||
|
||||
@Test
|
||||
public void testFromVector2() {
|
||||
|
|
@ -139,6 +143,28 @@ public class GenotypeLikelihoodsUnitTest {
|
|||
}
|
||||
}
|
||||
|
||||
// this test is completely broken, the method is wrong.
|
||||
public void testGetQualFromLikelihoodsMultiAllelicBroken() {
|
||||
GenotypeLikelihoods gl = GenotypeLikelihoods.fromLog10Likelihoods(triAllelic);
|
||||
double actualGQ = gl.getLog10GQ(GenotypeType.HET);
|
||||
double expectedGQ = 1.6;
|
||||
Assert.assertEquals(actualGQ,expectedGQ);
|
||||
}
|
||||
|
||||
public void testGetQualFromLikelihoodsMultiAllelic() {
|
||||
GenotypeLikelihoods gl = GenotypeLikelihoods.fromLog10Likelihoods(triAllelic);
|
||||
Allele ref = Allele.create(BaseUtils.A,true);
|
||||
Allele alt1 = Allele.create(BaseUtils.C);
|
||||
Allele alt2 = Allele.create(BaseUtils.T);
|
||||
List<Allele> allAlleles = Arrays.asList(ref,alt1,alt2);
|
||||
List<Allele> gtAlleles = Arrays.asList(alt1,alt2);
|
||||
GenotypeBuilder gtBuilder = new GenotypeBuilder();
|
||||
gtBuilder.alleles(gtAlleles);
|
||||
double actualGQ = gl.getLog10GQ(gtBuilder.make(),allAlleles);
|
||||
double expectedGQ = 1.6;
|
||||
Assert.assertEquals(actualGQ,expectedGQ);
|
||||
}
|
||||
|
||||
private void assertDoubleArraysAreEqual(double[] v1, double[] v2) {
|
||||
Assert.assertEquals(v1.length, v2.length);
|
||||
for ( int i = 0; i < v1.length; i++ ) {
|
||||
|
|
|
|||
|
|
@ -596,6 +596,51 @@ public class VariantContextTestProvider {
|
|||
return TEST_DATAs;
|
||||
}
|
||||
|
||||
public static void testReaderWriterWithMissingGenotypes(final VariantContextIOTest tester, final VariantContextTestData data) throws IOException {
|
||||
final int nSamples = data.header.getNGenotypeSamples();
|
||||
if ( nSamples > 2 ) {
|
||||
for ( final VariantContext vc : data.vcs )
|
||||
if ( vc.isSymbolic() )
|
||||
// cannot handle symbolic alleles because they may be weird non-call VCFs
|
||||
return;
|
||||
|
||||
final File tmpFile = File.createTempFile("testReaderWriter", tester.getExtension());
|
||||
tmpFile.deleteOnExit();
|
||||
|
||||
// write expected to disk
|
||||
final EnumSet<Options> options = EnumSet.of(Options.INDEX_ON_THE_FLY);
|
||||
final VariantContextWriter writer = tester.makeWriter(tmpFile, options);
|
||||
|
||||
final Set<String> samplesInVCF = new HashSet<String>(data.header.getGenotypeSamples());
|
||||
final List<String> missingSamples = Arrays.asList("MISSING1", "MISSING2");
|
||||
final List<String> allSamples = new ArrayList<String>(missingSamples);
|
||||
allSamples.addAll(samplesInVCF);
|
||||
|
||||
final VCFHeader header = new VCFHeader(data.header.getMetaDataInInputOrder(), allSamples);
|
||||
writeVCsToFile(writer, header, data.vcs);
|
||||
|
||||
// ensure writing of expected == actual
|
||||
final Pair<VCFHeader, Iterable<VariantContext>> p = readAllVCs(tmpFile, tester.makeCodec());
|
||||
final Iterable<VariantContext> actual = p.getSecond();
|
||||
|
||||
int i = 0;
|
||||
for ( final VariantContext readVC : actual ) {
|
||||
if ( readVC == null ) continue; // sometimes we read null records...
|
||||
final VariantContext expected = data.vcs.get(i++);
|
||||
for ( final Genotype g : readVC.getGenotypes() ) {
|
||||
Assert.assertTrue(allSamples.contains(g.getSampleName()));
|
||||
if ( samplesInVCF.contains(g.getSampleName()) ) {
|
||||
assertEquals(g, expected.getGenotype(g.getSampleName()));
|
||||
} else {
|
||||
// missing
|
||||
Assert.assertTrue(g.isNoCall());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
public static void testReaderWriter(final VariantContextIOTest tester, final VariantContextTestData data) throws IOException {
|
||||
testReaderWriter(tester, data.header, data.vcs, data.vcs, true);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -82,6 +82,11 @@ public class VariantContextWritersUnitTest extends BaseTest {
|
|||
VariantContextTestProvider.testReaderWriter(new BCFIOTester(), testData);
|
||||
}
|
||||
|
||||
@Test(dataProvider = "VariantContextTest_SingleContexts")
|
||||
public void testBCF2WriterReaderMissingGenotypes(final VariantContextTestProvider.VariantContextTestData testData) throws IOException {
|
||||
VariantContextTestProvider.testReaderWriterWithMissingGenotypes(new BCFIOTester(), testData);
|
||||
}
|
||||
|
||||
private class BCFIOTester extends VariantContextTestProvider.VariantContextIOTest {
|
||||
@Override
|
||||
public String getExtension() {
|
||||
|
|
@ -110,6 +115,11 @@ public class VariantContextWritersUnitTest extends BaseTest {
|
|||
VariantContextTestProvider.testReaderWriter(new VCFIOTester(), testData);
|
||||
}
|
||||
|
||||
@Test(enabled = true, dataProvider = "VariantContextTest_SingleContexts")
|
||||
public void testVCF4WriterReaderMissingGenotypes(final VariantContextTestProvider.VariantContextTestData testData) throws IOException {
|
||||
VariantContextTestProvider.testReaderWriterWithMissingGenotypes(new VCFIOTester(), testData);
|
||||
}
|
||||
|
||||
private class VCFIOTester extends VariantContextTestProvider.VariantContextIOTest {
|
||||
@Override
|
||||
public String getExtension() {
|
||||
|
|
|
|||
Loading…
Reference in New Issue