Merge branch 'master' of ssh://gsa2.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
faad2972d6
|
|
@ -40,6 +40,7 @@ import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyper;
|
||||||
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
|
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
|
||||||
import org.broadinstitute.sting.utils.MendelianViolation;
|
import org.broadinstitute.sting.utils.MendelianViolation;
|
||||||
import org.broadinstitute.sting.utils.SampleUtils;
|
import org.broadinstitute.sting.utils.SampleUtils;
|
||||||
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter;
|
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter;
|
||||||
|
|
@ -325,6 +326,9 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
||||||
@Argument(doc="indel size select",required=false,fullName="maxIndelSize")
|
@Argument(doc="indel size select",required=false,fullName="maxIndelSize")
|
||||||
private int maxIndelSize = Integer.MAX_VALUE;
|
private int maxIndelSize = Integer.MAX_VALUE;
|
||||||
|
|
||||||
|
@Argument(doc="Allow a samples other than those in the VCF to be specified on the command line. These samples will be ignored.",required=false,fullName="ALLOW_NONOVERLAPPING_COMMAND_LINE_SAMPLES")
|
||||||
|
private boolean ALLOW_NONOVERLAPPING_COMMAND_LINE_SAMPLES = false;
|
||||||
|
|
||||||
|
|
||||||
/* Private class used to store the intermediate variants in the integer random selection process */
|
/* Private class used to store the intermediate variants in the integer random selection process */
|
||||||
private static class RandomVariantStructure {
|
private static class RandomVariantStructure {
|
||||||
|
|
@ -386,10 +390,31 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
||||||
Collection<String> samplesFromFile = SampleUtils.getSamplesFromFiles(sampleFiles);
|
Collection<String> samplesFromFile = SampleUtils.getSamplesFromFiles(sampleFiles);
|
||||||
Collection<String> samplesFromExpressions = SampleUtils.matchSamplesExpressions(vcfSamples, sampleExpressions);
|
Collection<String> samplesFromExpressions = SampleUtils.matchSamplesExpressions(vcfSamples, sampleExpressions);
|
||||||
|
|
||||||
// first, add any requested samples
|
// first, check overlap between requested and present samples
|
||||||
samples.addAll(samplesFromFile);
|
Set<String> commandLineUniqueSamples = new HashSet<String>(samplesFromFile.size()+samplesFromExpressions.size()+sampleNames.size());
|
||||||
samples.addAll(samplesFromExpressions);
|
commandLineUniqueSamples.addAll(samplesFromFile);
|
||||||
|
commandLineUniqueSamples.addAll(samplesFromExpressions);
|
||||||
|
commandLineUniqueSamples.addAll(sampleNames);
|
||||||
|
commandLineUniqueSamples.removeAll(vcfSamples);
|
||||||
|
|
||||||
|
// second, add the requested samples
|
||||||
samples.addAll(sampleNames);
|
samples.addAll(sampleNames);
|
||||||
|
samples.addAll(samplesFromExpressions);
|
||||||
|
samples.addAll(samplesFromFile);
|
||||||
|
|
||||||
|
logger.debug(Utils.join(",",commandLineUniqueSamples));
|
||||||
|
|
||||||
|
if ( commandLineUniqueSamples.size() > 0 && ALLOW_NONOVERLAPPING_COMMAND_LINE_SAMPLES ) {
|
||||||
|
logger.warn("Samples present on command line input that are not present in the VCF. These samples will be ignored.");
|
||||||
|
samples.removeAll(commandLineUniqueSamples);
|
||||||
|
} else if (commandLineUniqueSamples.size() > 0 ) {
|
||||||
|
throw new UserException.BadInput(String.format("%s%n%n%s%n%n%s%n%n%s",
|
||||||
|
"Samples entered on command line (through -sf or -sn) that are not present in the VCF.",
|
||||||
|
"A list of these samples:",
|
||||||
|
Utils.join(",",commandLineUniqueSamples),
|
||||||
|
"To ignore these samples, run with --ALLOW_NONOVERLAPPING_COMMAND_LINE_SAMPLES"));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
// if none were requested, we want all of them
|
// if none were requested, we want all of them
|
||||||
if ( samples.isEmpty() ) {
|
if ( samples.isEmpty() ) {
|
||||||
|
|
|
||||||
|
|
@ -7,7 +7,9 @@ import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgume
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.Requires;
|
||||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||||
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
|
@ -15,6 +17,7 @@ import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||||
import org.broadinstitute.sting.utils.text.XReadLines;
|
import org.broadinstitute.sting.utils.text.XReadLines;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||||
|
import org.broadinstitute.sting.utils.variantcontext.GenotypeLikelihoods;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
|
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
|
||||||
|
|
||||||
|
|
@ -278,7 +281,7 @@ public class VariantsToBinaryPed extends RodWalker<Integer,Integer> {
|
||||||
|
|
||||||
private byte getFlippedEncoding(Genotype g, int offset) {
|
private byte getFlippedEncoding(Genotype g, int offset) {
|
||||||
byte b;
|
byte b;
|
||||||
if ( g.hasGQ() && g.getGQ() < minGenotypeQuality ) {
|
if ( ! checkGQIsGood(g) ) {
|
||||||
b = NO_CALL;
|
b = NO_CALL;
|
||||||
} else if ( g.isHomRef() ) {
|
} else if ( g.isHomRef() ) {
|
||||||
b = HOM_VAR;
|
b = HOM_VAR;
|
||||||
|
|
@ -293,6 +296,16 @@ public class VariantsToBinaryPed extends RodWalker<Integer,Integer> {
|
||||||
return (byte) (b << (2*offset));
|
return (byte) (b << (2*offset));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private boolean checkGQIsGood(Genotype genotype) {
|
||||||
|
if ( genotype.hasGQ() ) {
|
||||||
|
return genotype.getGQ() >= minGenotypeQuality;
|
||||||
|
} else if ( genotype.hasLikelihoods() ) {
|
||||||
|
return GenotypeLikelihoods.getGQLog10FromLikelihoods(genotype.getType().ordinal()-1,genotype.getLikelihoods().getAsVector()) >= minGenotypeQuality;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
private static String getID(VariantContext v) {
|
private static String getID(VariantContext v) {
|
||||||
if ( v.hasID() ) {
|
if ( v.hasID() ) {
|
||||||
return v.getID();
|
return v.getID();
|
||||||
|
|
|
||||||
|
|
@ -34,6 +34,7 @@ import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.EnumMap;
|
import java.util.EnumMap;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
public class GenotypeLikelihoods {
|
public class GenotypeLikelihoods {
|
||||||
private final static int NUM_LIKELIHOODS_CACHE_N_ALLELES = 5;
|
private final static int NUM_LIKELIHOODS_CACHE_N_ALLELES = 5;
|
||||||
|
|
@ -167,10 +168,36 @@ public class GenotypeLikelihoods {
|
||||||
|
|
||||||
//Return the neg log10 Genotype Quality (GQ) for the given genotype
|
//Return the neg log10 Genotype Quality (GQ) for the given genotype
|
||||||
//Returns Double.NEGATIVE_INFINITY in case of missing genotype
|
//Returns Double.NEGATIVE_INFINITY in case of missing genotype
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This is really dangerous and returns completely wrong results for genotypes from a multi-allelic context.
|
||||||
|
* Use getLog10GQ(Genotype,VariantContext) or getLog10GQ(Genotype,List<Allele>) in place of it.
|
||||||
|
*
|
||||||
|
* If you **know** you're biallelic, use getGQLog10FromLikelihoods directly.
|
||||||
|
* @param genotype - actually a genotype type (no call, hom ref, het, hom var)
|
||||||
|
* @return an unsafe quantity that could be negative. In the bi-allelic case, the GQ resulting from best minus next best (if the type is the best).
|
||||||
|
*/
|
||||||
|
@Deprecated
|
||||||
public double getLog10GQ(GenotypeType genotype){
|
public double getLog10GQ(GenotypeType genotype){
|
||||||
return getGQLog10FromLikelihoods(genotype.ordinal() - 1 /* NO_CALL IS FIRST */, getAsVector());
|
return getGQLog10FromLikelihoods(genotype.ordinal() - 1 /* NO_CALL IS FIRST */, getAsVector());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Requires({"genotypeAlleles != null","genotypeAlleles.size()==2","contextAlleles != null","contextAlleles.size() >= 1"})
|
||||||
|
private double getLog10GQ(List<Allele> genotypeAlleles,List<Allele> contextAlleles) {
|
||||||
|
int allele1Index = contextAlleles.indexOf(genotypeAlleles.get(0));
|
||||||
|
int allele2Index = contextAlleles.indexOf(genotypeAlleles.get(1));
|
||||||
|
int plIndex = calculatePLindex(allele1Index,allele2Index);
|
||||||
|
return getGQLog10FromLikelihoods(plIndex,getAsVector());
|
||||||
|
}
|
||||||
|
|
||||||
|
public double getLog10GQ(Genotype genotype, List<Allele> vcAlleles ) {
|
||||||
|
return getLog10GQ(genotype.getAlleles(),vcAlleles);
|
||||||
|
}
|
||||||
|
|
||||||
|
public double getLog10GQ(Genotype genotype, VariantContext context) {
|
||||||
|
return getLog10GQ(genotype,context.getAlleles());
|
||||||
|
}
|
||||||
|
|
||||||
public static double getGQLog10FromLikelihoods(int iOfChoosenGenotype, double[] likelihoods){
|
public static double getGQLog10FromLikelihoods(int iOfChoosenGenotype, double[] likelihoods){
|
||||||
if(likelihoods == null)
|
if(likelihoods == null)
|
||||||
return Double.NEGATIVE_INFINITY;
|
return Double.NEGATIVE_INFINITY;
|
||||||
|
|
|
||||||
|
|
@ -70,6 +70,20 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
||||||
executeTest("testComplexSelection--" + testfile, spec);
|
executeTest("testComplexSelection--" + testfile, spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testComplexSelectionWithNonExistingSamples() {
|
||||||
|
String testfile = validationDataLocation + "test.filtered.maf_annotated.vcf";
|
||||||
|
String samplesFile = validationDataLocation + "SelectVariants.samples.txt";
|
||||||
|
|
||||||
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
|
baseTestString(" --ALLOW_NONOVERLAPPING_COMMAND_LINE_SAMPLES -sn A -se '[CDH]' -sn Z -sn T -sf " + samplesFile + " -env -ef -select 'DP < 250' --variant " + testfile),
|
||||||
|
1,
|
||||||
|
Arrays.asList("4386fbb258dcef4437495a37f5a83c53")
|
||||||
|
);
|
||||||
|
spec.disableShadowBCF();
|
||||||
|
executeTest("testComplexSelectionWithNonExistingSamples--" + testfile, spec);
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testNonExistingFieldSelection() {
|
public void testNonExistingFieldSelection() {
|
||||||
String testfile = validationDataLocation + "test.filtered.maf_annotated.vcf";
|
String testfile = validationDataLocation + "test.filtered.maf_annotated.vcf";
|
||||||
|
|
@ -98,6 +112,21 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
||||||
executeTest("testSampleExclusion--" + testfile, spec);
|
executeTest("testSampleExclusion--" + testfile, spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSampleInclusionWithNonexistingSamples() {
|
||||||
|
String testfile = validationDataLocation + "test.filtered.maf_annotated.vcf";
|
||||||
|
String samplesFile = validationDataLocation + "SelectVariants.samples.txt";
|
||||||
|
|
||||||
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
|
"-T SelectVariants -R " + b36KGReference + " -L 1:1-1000000 -o %s --no_cmdline_in_header -sn A -sn Z -sn Q -sf " + samplesFile + " --variant " + testfile,
|
||||||
|
1,
|
||||||
|
UserException.BadInput.class
|
||||||
|
);
|
||||||
|
spec.disableShadowBCF();
|
||||||
|
|
||||||
|
executeTest("testSampleInclusionWithNonexistingSamples--" + testfile, spec);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testConcordance() {
|
public void testConcordance() {
|
||||||
|
|
|
||||||
|
|
@ -29,12 +29,15 @@ package org.broadinstitute.sting.utils.variantcontext;
|
||||||
// the imports for unit testing.
|
// the imports for unit testing.
|
||||||
|
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
import org.broadinstitute.sting.utils.MathUtils;
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
import org.testng.Assert;
|
import org.testng.Assert;
|
||||||
import org.testng.annotations.Test;
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
import java.util.EnumMap;
|
import java.util.EnumMap;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -44,6 +47,7 @@ public class GenotypeLikelihoodsUnitTest {
|
||||||
double [] v = new double[]{-10.5, -1.25, -5.11};
|
double [] v = new double[]{-10.5, -1.25, -5.11};
|
||||||
final static String vGLString = "-10.50,-1.25,-5.11";
|
final static String vGLString = "-10.50,-1.25,-5.11";
|
||||||
final static String vPLString = "93,0,39";
|
final static String vPLString = "93,0,39";
|
||||||
|
double[] triAllelic = new double[]{-4.2,-2.0,-3.0,-1.6,0.0,-4.0}; //AA,AB,AC,BB,BC,CC
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testFromVector2() {
|
public void testFromVector2() {
|
||||||
|
|
@ -139,6 +143,28 @@ public class GenotypeLikelihoodsUnitTest {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// this test is completely broken, the method is wrong.
|
||||||
|
public void testGetQualFromLikelihoodsMultiAllelicBroken() {
|
||||||
|
GenotypeLikelihoods gl = GenotypeLikelihoods.fromLog10Likelihoods(triAllelic);
|
||||||
|
double actualGQ = gl.getLog10GQ(GenotypeType.HET);
|
||||||
|
double expectedGQ = 1.6;
|
||||||
|
Assert.assertEquals(actualGQ,expectedGQ);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testGetQualFromLikelihoodsMultiAllelic() {
|
||||||
|
GenotypeLikelihoods gl = GenotypeLikelihoods.fromLog10Likelihoods(triAllelic);
|
||||||
|
Allele ref = Allele.create(BaseUtils.A,true);
|
||||||
|
Allele alt1 = Allele.create(BaseUtils.C);
|
||||||
|
Allele alt2 = Allele.create(BaseUtils.T);
|
||||||
|
List<Allele> allAlleles = Arrays.asList(ref,alt1,alt2);
|
||||||
|
List<Allele> gtAlleles = Arrays.asList(alt1,alt2);
|
||||||
|
GenotypeBuilder gtBuilder = new GenotypeBuilder();
|
||||||
|
gtBuilder.alleles(gtAlleles);
|
||||||
|
double actualGQ = gl.getLog10GQ(gtBuilder.make(),allAlleles);
|
||||||
|
double expectedGQ = 1.6;
|
||||||
|
Assert.assertEquals(actualGQ,expectedGQ);
|
||||||
|
}
|
||||||
|
|
||||||
private void assertDoubleArraysAreEqual(double[] v1, double[] v2) {
|
private void assertDoubleArraysAreEqual(double[] v1, double[] v2) {
|
||||||
Assert.assertEquals(v1.length, v2.length);
|
Assert.assertEquals(v1.length, v2.length);
|
||||||
for ( int i = 0; i < v1.length; i++ ) {
|
for ( int i = 0; i < v1.length; i++ ) {
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue