Merge branch 'master' of ssh://gsa2.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
faad2972d6
|
|
@ -40,6 +40,7 @@ import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyper;
|
|||
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
|
||||
import org.broadinstitute.sting.utils.MendelianViolation;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter;
|
||||
|
|
@ -325,6 +326,9 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
|||
@Argument(doc="indel size select",required=false,fullName="maxIndelSize")
|
||||
private int maxIndelSize = Integer.MAX_VALUE;
|
||||
|
||||
@Argument(doc="Allow a samples other than those in the VCF to be specified on the command line. These samples will be ignored.",required=false,fullName="ALLOW_NONOVERLAPPING_COMMAND_LINE_SAMPLES")
|
||||
private boolean ALLOW_NONOVERLAPPING_COMMAND_LINE_SAMPLES = false;
|
||||
|
||||
|
||||
/* Private class used to store the intermediate variants in the integer random selection process */
|
||||
private static class RandomVariantStructure {
|
||||
|
|
@ -386,10 +390,31 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
|||
Collection<String> samplesFromFile = SampleUtils.getSamplesFromFiles(sampleFiles);
|
||||
Collection<String> samplesFromExpressions = SampleUtils.matchSamplesExpressions(vcfSamples, sampleExpressions);
|
||||
|
||||
// first, add any requested samples
|
||||
samples.addAll(samplesFromFile);
|
||||
samples.addAll(samplesFromExpressions);
|
||||
// first, check overlap between requested and present samples
|
||||
Set<String> commandLineUniqueSamples = new HashSet<String>(samplesFromFile.size()+samplesFromExpressions.size()+sampleNames.size());
|
||||
commandLineUniqueSamples.addAll(samplesFromFile);
|
||||
commandLineUniqueSamples.addAll(samplesFromExpressions);
|
||||
commandLineUniqueSamples.addAll(sampleNames);
|
||||
commandLineUniqueSamples.removeAll(vcfSamples);
|
||||
|
||||
// second, add the requested samples
|
||||
samples.addAll(sampleNames);
|
||||
samples.addAll(samplesFromExpressions);
|
||||
samples.addAll(samplesFromFile);
|
||||
|
||||
logger.debug(Utils.join(",",commandLineUniqueSamples));
|
||||
|
||||
if ( commandLineUniqueSamples.size() > 0 && ALLOW_NONOVERLAPPING_COMMAND_LINE_SAMPLES ) {
|
||||
logger.warn("Samples present on command line input that are not present in the VCF. These samples will be ignored.");
|
||||
samples.removeAll(commandLineUniqueSamples);
|
||||
} else if (commandLineUniqueSamples.size() > 0 ) {
|
||||
throw new UserException.BadInput(String.format("%s%n%n%s%n%n%s%n%n%s",
|
||||
"Samples entered on command line (through -sf or -sn) that are not present in the VCF.",
|
||||
"A list of these samples:",
|
||||
Utils.join(",",commandLineUniqueSamples),
|
||||
"To ignore these samples, run with --ALLOW_NONOVERLAPPING_COMMAND_LINE_SAMPLES"));
|
||||
}
|
||||
|
||||
|
||||
// if none were requested, we want all of them
|
||||
if ( samples.isEmpty() ) {
|
||||
|
|
|
|||
|
|
@ -7,7 +7,9 @@ import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgume
|
|||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.Requires;
|
||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
|
@ -15,6 +17,7 @@ import org.broadinstitute.sting.utils.exceptions.UserException;
|
|||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.sting.utils.text.XReadLines;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.GenotypeLikelihoods;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
|
||||
|
||||
|
|
@ -278,7 +281,7 @@ public class VariantsToBinaryPed extends RodWalker<Integer,Integer> {
|
|||
|
||||
private byte getFlippedEncoding(Genotype g, int offset) {
|
||||
byte b;
|
||||
if ( g.hasGQ() && g.getGQ() < minGenotypeQuality ) {
|
||||
if ( ! checkGQIsGood(g) ) {
|
||||
b = NO_CALL;
|
||||
} else if ( g.isHomRef() ) {
|
||||
b = HOM_VAR;
|
||||
|
|
@ -293,6 +296,16 @@ public class VariantsToBinaryPed extends RodWalker<Integer,Integer> {
|
|||
return (byte) (b << (2*offset));
|
||||
}
|
||||
|
||||
private boolean checkGQIsGood(Genotype genotype) {
|
||||
if ( genotype.hasGQ() ) {
|
||||
return genotype.getGQ() >= minGenotypeQuality;
|
||||
} else if ( genotype.hasLikelihoods() ) {
|
||||
return GenotypeLikelihoods.getGQLog10FromLikelihoods(genotype.getType().ordinal()-1,genotype.getLikelihoods().getAsVector()) >= minGenotypeQuality;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
private static String getID(VariantContext v) {
|
||||
if ( v.hasID() ) {
|
||||
return v.getID();
|
||||
|
|
|
|||
|
|
@ -34,6 +34,7 @@ import org.broadinstitute.sting.utils.exceptions.UserException;
|
|||
|
||||
import java.util.Arrays;
|
||||
import java.util.EnumMap;
|
||||
import java.util.List;
|
||||
|
||||
public class GenotypeLikelihoods {
|
||||
private final static int NUM_LIKELIHOODS_CACHE_N_ALLELES = 5;
|
||||
|
|
@ -167,10 +168,36 @@ public class GenotypeLikelihoods {
|
|||
|
||||
//Return the neg log10 Genotype Quality (GQ) for the given genotype
|
||||
//Returns Double.NEGATIVE_INFINITY in case of missing genotype
|
||||
|
||||
/**
|
||||
* This is really dangerous and returns completely wrong results for genotypes from a multi-allelic context.
|
||||
* Use getLog10GQ(Genotype,VariantContext) or getLog10GQ(Genotype,List<Allele>) in place of it.
|
||||
*
|
||||
* If you **know** you're biallelic, use getGQLog10FromLikelihoods directly.
|
||||
* @param genotype - actually a genotype type (no call, hom ref, het, hom var)
|
||||
* @return an unsafe quantity that could be negative. In the bi-allelic case, the GQ resulting from best minus next best (if the type is the best).
|
||||
*/
|
||||
@Deprecated
|
||||
public double getLog10GQ(GenotypeType genotype){
|
||||
return getGQLog10FromLikelihoods(genotype.ordinal() - 1 /* NO_CALL IS FIRST */, getAsVector());
|
||||
}
|
||||
|
||||
@Requires({"genotypeAlleles != null","genotypeAlleles.size()==2","contextAlleles != null","contextAlleles.size() >= 1"})
|
||||
private double getLog10GQ(List<Allele> genotypeAlleles,List<Allele> contextAlleles) {
|
||||
int allele1Index = contextAlleles.indexOf(genotypeAlleles.get(0));
|
||||
int allele2Index = contextAlleles.indexOf(genotypeAlleles.get(1));
|
||||
int plIndex = calculatePLindex(allele1Index,allele2Index);
|
||||
return getGQLog10FromLikelihoods(plIndex,getAsVector());
|
||||
}
|
||||
|
||||
public double getLog10GQ(Genotype genotype, List<Allele> vcAlleles ) {
|
||||
return getLog10GQ(genotype.getAlleles(),vcAlleles);
|
||||
}
|
||||
|
||||
public double getLog10GQ(Genotype genotype, VariantContext context) {
|
||||
return getLog10GQ(genotype,context.getAlleles());
|
||||
}
|
||||
|
||||
public static double getGQLog10FromLikelihoods(int iOfChoosenGenotype, double[] likelihoods){
|
||||
if(likelihoods == null)
|
||||
return Double.NEGATIVE_INFINITY;
|
||||
|
|
|
|||
|
|
@ -70,6 +70,20 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
|||
executeTest("testComplexSelection--" + testfile, spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testComplexSelectionWithNonExistingSamples() {
|
||||
String testfile = validationDataLocation + "test.filtered.maf_annotated.vcf";
|
||||
String samplesFile = validationDataLocation + "SelectVariants.samples.txt";
|
||||
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString(" --ALLOW_NONOVERLAPPING_COMMAND_LINE_SAMPLES -sn A -se '[CDH]' -sn Z -sn T -sf " + samplesFile + " -env -ef -select 'DP < 250' --variant " + testfile),
|
||||
1,
|
||||
Arrays.asList("4386fbb258dcef4437495a37f5a83c53")
|
||||
);
|
||||
spec.disableShadowBCF();
|
||||
executeTest("testComplexSelectionWithNonExistingSamples--" + testfile, spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNonExistingFieldSelection() {
|
||||
String testfile = validationDataLocation + "test.filtered.maf_annotated.vcf";
|
||||
|
|
@ -98,6 +112,21 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
|||
executeTest("testSampleExclusion--" + testfile, spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSampleInclusionWithNonexistingSamples() {
|
||||
String testfile = validationDataLocation + "test.filtered.maf_annotated.vcf";
|
||||
String samplesFile = validationDataLocation + "SelectVariants.samples.txt";
|
||||
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T SelectVariants -R " + b36KGReference + " -L 1:1-1000000 -o %s --no_cmdline_in_header -sn A -sn Z -sn Q -sf " + samplesFile + " --variant " + testfile,
|
||||
1,
|
||||
UserException.BadInput.class
|
||||
);
|
||||
spec.disableShadowBCF();
|
||||
|
||||
executeTest("testSampleInclusionWithNonexistingSamples--" + testfile, spec);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testConcordance() {
|
||||
|
|
|
|||
|
|
@ -29,12 +29,15 @@ package org.broadinstitute.sting.utils.variantcontext;
|
|||
// the imports for unit testing.
|
||||
|
||||
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.EnumMap;
|
||||
import java.util.List;
|
||||
|
||||
|
||||
/**
|
||||
|
|
@ -44,6 +47,7 @@ public class GenotypeLikelihoodsUnitTest {
|
|||
double [] v = new double[]{-10.5, -1.25, -5.11};
|
||||
final static String vGLString = "-10.50,-1.25,-5.11";
|
||||
final static String vPLString = "93,0,39";
|
||||
double[] triAllelic = new double[]{-4.2,-2.0,-3.0,-1.6,0.0,-4.0}; //AA,AB,AC,BB,BC,CC
|
||||
|
||||
@Test
|
||||
public void testFromVector2() {
|
||||
|
|
@ -139,6 +143,28 @@ public class GenotypeLikelihoodsUnitTest {
|
|||
}
|
||||
}
|
||||
|
||||
// this test is completely broken, the method is wrong.
|
||||
public void testGetQualFromLikelihoodsMultiAllelicBroken() {
|
||||
GenotypeLikelihoods gl = GenotypeLikelihoods.fromLog10Likelihoods(triAllelic);
|
||||
double actualGQ = gl.getLog10GQ(GenotypeType.HET);
|
||||
double expectedGQ = 1.6;
|
||||
Assert.assertEquals(actualGQ,expectedGQ);
|
||||
}
|
||||
|
||||
public void testGetQualFromLikelihoodsMultiAllelic() {
|
||||
GenotypeLikelihoods gl = GenotypeLikelihoods.fromLog10Likelihoods(triAllelic);
|
||||
Allele ref = Allele.create(BaseUtils.A,true);
|
||||
Allele alt1 = Allele.create(BaseUtils.C);
|
||||
Allele alt2 = Allele.create(BaseUtils.T);
|
||||
List<Allele> allAlleles = Arrays.asList(ref,alt1,alt2);
|
||||
List<Allele> gtAlleles = Arrays.asList(alt1,alt2);
|
||||
GenotypeBuilder gtBuilder = new GenotypeBuilder();
|
||||
gtBuilder.alleles(gtAlleles);
|
||||
double actualGQ = gl.getLog10GQ(gtBuilder.make(),allAlleles);
|
||||
double expectedGQ = 1.6;
|
||||
Assert.assertEquals(actualGQ,expectedGQ);
|
||||
}
|
||||
|
||||
private void assertDoubleArraysAreEqual(double[] v1, double[] v2) {
|
||||
Assert.assertEquals(v1.length, v2.length);
|
||||
for ( int i = 0; i < v1.length; i++ ) {
|
||||
|
|
|
|||
Loading…
Reference in New Issue