Merge branch 'master' into rodRefactor
This commit is contained in:
commit
f8a56bc64b
|
|
@ -557,6 +557,10 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
|
||||||
|
|
||||||
VariantContext sub = vc.subContextFromGenotypes(genotypes, vc.getAlleles());
|
VariantContext sub = vc.subContextFromGenotypes(genotypes, vc.getAlleles());
|
||||||
|
|
||||||
|
// if we have fewer alternate alleles in the selected VC than in the original VC, we need to strip out the GL/PLs (because they are no longer accurate)
|
||||||
|
if ( vc.getAlleles().size() != sub.getAlleles().size() )
|
||||||
|
sub = VariantContext.modifyGenotypes(sub, VariantContextUtils.stripPLs(vc.getGenotypes()));
|
||||||
|
|
||||||
HashMap<String, Object> attributes = new HashMap<String, Object>(sub.getAttributes());
|
HashMap<String, Object> attributes = new HashMap<String, Object>(sub.getAttributes());
|
||||||
|
|
||||||
int depth = 0;
|
int depth = 0;
|
||||||
|
|
|
||||||
|
|
@ -147,13 +147,13 @@ public class MathUtils {
|
||||||
return Math.log10(sum) + maxValue;
|
return Math.log10(sum) + maxValue;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static double sum(List<Double> values) {
|
public static double sumDoubles(List<Double> values) {
|
||||||
double s = 0.0;
|
double s = 0.0;
|
||||||
for ( double v : values) s += v;
|
for ( double v : values) s += v;
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static int sum(List<Integer> values) {
|
public static int sumIntegers(List<Integer> values) {
|
||||||
int s = 0;
|
int s = 0;
|
||||||
for ( int v : values) s += v;
|
for ( int v : values) s += v;
|
||||||
return s;
|
return s;
|
||||||
|
|
@ -428,7 +428,7 @@ public class MathUtils {
|
||||||
|
|
||||||
// for precision purposes, we need to add (or really subtract, since they're
|
// for precision purposes, we need to add (or really subtract, since they're
|
||||||
// all negative) the largest value; also, we need to convert to normal-space.
|
// all negative) the largest value; also, we need to convert to normal-space.
|
||||||
double maxValue = MathUtils.arrayMax( array );
|
double maxValue = MathUtils.arrayMaxDouble( array );
|
||||||
for (int i = 0; i < array.size(); i++)
|
for (int i = 0; i < array.size(); i++)
|
||||||
normalized[i] = Math.pow(10, array.get(i) - maxValue);
|
normalized[i] = Math.pow(10, array.get(i) - maxValue);
|
||||||
|
|
||||||
|
|
@ -507,7 +507,7 @@ public class MathUtils {
|
||||||
return minI;
|
return minI;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static int arrayMax(List<Integer> array) {
|
public static int arrayMaxInt(List<Integer> array) {
|
||||||
if ( array == null ) throw new IllegalArgumentException("Array cannot be null!");
|
if ( array == null ) throw new IllegalArgumentException("Array cannot be null!");
|
||||||
if ( array.size() == 0 ) throw new IllegalArgumentException("Array size cannot be 0!");
|
if ( array.size() == 0 ) throw new IllegalArgumentException("Array size cannot be 0!");
|
||||||
|
|
||||||
|
|
@ -516,7 +516,7 @@ public class MathUtils {
|
||||||
return m;
|
return m;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static double arrayMax(List<Double> array) {
|
public static double arrayMaxDouble(List<Double> array) {
|
||||||
if ( array == null ) throw new IllegalArgumentException("Array cannot be null!");
|
if ( array == null ) throw new IllegalArgumentException("Array cannot be null!");
|
||||||
if ( array.size() == 0 ) throw new IllegalArgumentException("Array size cannot be 0!");
|
if ( array.size() == 0 ) throw new IllegalArgumentException("Array size cannot be 0!");
|
||||||
|
|
||||||
|
|
@ -1274,5 +1274,4 @@ public class MathUtils {
|
||||||
public static double log10Factorial (int x) {
|
public static double log10Factorial (int x) {
|
||||||
return log10Gamma(x+1);
|
return log10Gamma(x+1);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
}
|
|
||||||
|
|
|
||||||
|
|
@ -57,6 +57,13 @@ public class Genotype {
|
||||||
return new Genotype(g.getSampleName(), g.getAlleles(), g.getNegLog10PError(), g.filtersWereApplied() ? g.getFilters() : null, attributes, g.isPhased());
|
return new Genotype(g.getSampleName(), g.getAlleles(), g.getNegLog10PError(), g.filtersWereApplied() ? g.getFilters() : null, attributes, g.isPhased());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static Genotype removePLs(Genotype g) {
|
||||||
|
Map<String, Object> attrs = new HashMap<String, Object>(g.getAttributes());
|
||||||
|
attrs.remove(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY);
|
||||||
|
attrs.remove(VCFConstants.GENOTYPE_LIKELIHOODS_KEY);
|
||||||
|
return new Genotype(g.getSampleName(), g.getAlleles(), g.getNegLog10PError(), g.filtersWereApplied() ? g.getFilters() : null, attrs, g.isPhased());
|
||||||
|
}
|
||||||
|
|
||||||
public static Genotype modifyAlleles(Genotype g, List<Allele> alleles) {
|
public static Genotype modifyAlleles(Genotype g, List<Allele> alleles) {
|
||||||
return new Genotype(g.getSampleName(), alleles, g.getNegLog10PError(), g.filtersWereApplied() ? g.getFilters() : null, g.getAttributes(), g.isPhased());
|
return new Genotype(g.getSampleName(), alleles, g.getNegLog10PError(), g.filtersWereApplied() ? g.getFilters() : null, g.getAttributes(), g.isPhased());
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -588,6 +588,14 @@ public class VariantContextUtils {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// if we have more alternate alleles in the merged VC than in one or more of the original VCs, we need to strip out the GL/PLs (because they are no longer accurate)
|
||||||
|
for ( VariantContext vc : VCs ) {
|
||||||
|
if ( vc.alleles.size() != alleles.size() ) {
|
||||||
|
genotypes = stripPLs(genotypes);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// take the VC with the maxAC and pull the attributes into a modifiable map
|
// take the VC with the maxAC and pull the attributes into a modifiable map
|
||||||
if ( mergeInfoWithMaxAC && vcWithMaxAC != null ) {
|
if ( mergeInfoWithMaxAC && vcWithMaxAC != null ) {
|
||||||
attributesWithMaxAC.putAll(vcWithMaxAC.getAttributes());
|
attributesWithMaxAC.putAll(vcWithMaxAC.getAttributes());
|
||||||
|
|
@ -633,6 +641,16 @@ public class VariantContextUtils {
|
||||||
return merged;
|
return merged;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static Map<String, Genotype> stripPLs(Map<String, Genotype> genotypes) {
|
||||||
|
Map<String, Genotype> newGs = new HashMap<String, Genotype>(genotypes.size());
|
||||||
|
|
||||||
|
for ( Map.Entry<String, Genotype> g : genotypes.entrySet() ) {
|
||||||
|
newGs.put(g.getKey(), g.getValue().hasLikelihoods() ? Genotype.removePLs(g.getValue()) : g.getValue());
|
||||||
|
}
|
||||||
|
|
||||||
|
return newGs;
|
||||||
|
}
|
||||||
|
|
||||||
public static Map<VariantContext.Type, List<VariantContext>> separateVariantContextsByType(Collection<VariantContext> VCs) {
|
public static Map<VariantContext.Type, List<VariantContext>> separateVariantContextsByType(Collection<VariantContext> VCs) {
|
||||||
HashMap<VariantContext.Type, List<VariantContext>> mappedVCs = new HashMap<VariantContext.Type, List<VariantContext>>();
|
HashMap<VariantContext.Type, List<VariantContext>> mappedVCs = new HashMap<VariantContext.Type, List<VariantContext>>();
|
||||||
for ( VariantContext vc : VCs ) {
|
for ( VariantContext vc : VCs ) {
|
||||||
|
|
|
||||||
|
|
@ -70,6 +70,14 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
|
||||||
executeTest("combineSites 1:" + new File(file1).getName() + " 2:" + new File(file2).getName() + " args = " + args, spec);
|
executeTest("combineSites 1:" + new File(file1).getName() + " 2:" + new File(file2).getName() + " args = " + args, spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void combinePLs(String file1, String file2, String md5) {
|
||||||
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
|
"-T CombineVariants -NO_HEADER -o %s -R " + b36KGReference + " -priority v1,v2 -B:v1,VCF " + validationDataLocation + file1 + " -B:v2,VCF " + validationDataLocation + file2,
|
||||||
|
1,
|
||||||
|
Arrays.asList(md5));
|
||||||
|
executeTest("combine PLs 1:" + new File(file1).getName() + " 2:" + new File(file2).getName(), spec);
|
||||||
|
}
|
||||||
|
|
||||||
@Test public void test1SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "c608b9fc1e36dba6cebb4f259883f9f0", true); }
|
@Test public void test1SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "c608b9fc1e36dba6cebb4f259883f9f0", true); }
|
||||||
@Test public void test2SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "20caad94411d6ab48153b214de916df8", " -setKey foo", true); }
|
@Test public void test2SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "20caad94411d6ab48153b214de916df8", " -setKey foo", true); }
|
||||||
@Test public void test3SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "004f3065cb1bc2ce2f9afd695caf0b48", " -setKey null", true); }
|
@Test public void test3SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "004f3065cb1bc2ce2f9afd695caf0b48", " -setKey null", true); }
|
||||||
|
|
@ -78,6 +86,8 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
|
||||||
@Test public void test1Indel1() { test1InOut("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "7593be578d4274d672fc22fced38012b", false); }
|
@Test public void test1Indel1() { test1InOut("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "7593be578d4274d672fc22fced38012b", false); }
|
||||||
@Test public void test1Indel2() { test1InOut("CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "1cd467863c4e948fadd970681552d57e", false); }
|
@Test public void test1Indel2() { test1InOut("CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "1cd467863c4e948fadd970681552d57e", false); }
|
||||||
|
|
||||||
|
@Test public void combineWithPLs() { combinePLs("combine.3.vcf", "combine.4.vcf", "0f873fed02aa99db5b140bcd6282c10a"); }
|
||||||
|
|
||||||
@Test public void combineTrioCalls() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", "", "1d5a021387a8a86554db45a29f66140f", false); } // official project VCF files in tabix format
|
@Test public void combineTrioCalls() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", "", "1d5a021387a8a86554db45a29f66140f", false); } // official project VCF files in tabix format
|
||||||
@Test public void combineTrioCallsMin() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", " -minimalVCF", "20163d60f18a46496f6da744ab5cc0f9", false); } // official project VCF files in tabix format
|
@Test public void combineTrioCallsMin() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", " -minimalVCF", "20163d60f18a46496f6da744ab5cc0f9", false); } // official project VCF files in tabix format
|
||||||
@Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "f1cf095c2fe9641b7ca1f8ee2c46fd4a", false); }
|
@Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "f1cf095c2fe9641b7ca1f8ee2c46fd4a", false); }
|
||||||
|
|
|
||||||
|
|
@ -63,4 +63,16 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
||||||
executeTest("testConcordance--" + testFile, spec);
|
executeTest("testConcordance--" + testFile, spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test(enabled=false)
|
||||||
|
public void testRemovePLs() {
|
||||||
|
String testFile = validationDataLocation + "combine.3.vcf";
|
||||||
|
|
||||||
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
|
"-T SelectVariants -R " + b36KGReference + " -sn NA12892 -B:variant,VCF " + testFile + " -o %s -NO_HEADER",
|
||||||
|
1,
|
||||||
|
Arrays.asList("")
|
||||||
|
);
|
||||||
|
|
||||||
|
executeTest("testWithPLs--" + testFile, spec);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -13,7 +13,7 @@ class GATKResourcesBundle extends QScript {
|
||||||
var gatkJarFile: File = new File("dist/GenomeAnalysisTK.jar")
|
var gatkJarFile: File = new File("dist/GenomeAnalysisTK.jar")
|
||||||
|
|
||||||
@Argument(doc="liftOverPerl", required=false)
|
@Argument(doc="liftOverPerl", required=false)
|
||||||
var liftOverPerl: File = new File("./perl/liftOverVCF.pl")
|
var liftOverPerl: File = new File("./public/perl/liftOverVCF.pl")
|
||||||
|
|
||||||
@Argument(shortName = "ver", doc="The SVN version of this release", required=true)
|
@Argument(shortName = "ver", doc="The SVN version of this release", required=true)
|
||||||
var VERSION: String = _
|
var VERSION: String = _
|
||||||
|
|
@ -57,11 +57,11 @@ class GATKResourcesBundle extends QScript {
|
||||||
//Console.printf("liftover(%s => %s)%n", inRef.name, outRef.name)
|
//Console.printf("liftover(%s => %s)%n", inRef.name, outRef.name)
|
||||||
(inRef.name, outRef.name) match {
|
(inRef.name, outRef.name) match {
|
||||||
case ("b37", "hg19") =>
|
case ("b37", "hg19") =>
|
||||||
return new LiftOverPerl(in, out, new File("chainFiles/b37tohg19.chain"), inRef, outRef)
|
return new LiftOverPerl(in, out, new File("public/chainFiles/b37tohg19.chain"), inRef, outRef)
|
||||||
case ("b37", "hg18") =>
|
case ("b37", "hg18") =>
|
||||||
return new LiftOverPerl(in, out, new File("chainFiles/b37tohg18.chain"), inRef, outRef)
|
return new LiftOverPerl(in, out, new File("public/chainFiles/b37tohg18.chain"), inRef, outRef)
|
||||||
case ("b37", "b36") =>
|
case ("b37", "b36") =>
|
||||||
return new LiftOverPerl(in, out, new File("chainFiles/b37tob36.chain"), inRef, outRef)
|
return new LiftOverPerl(in, out, new File("public/chainFiles/b37tob36.chain"), inRef, outRef)
|
||||||
case _ => return null
|
case _ => return null
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -85,7 +85,7 @@ class GATKResourcesBundle extends QScript {
|
||||||
//
|
//
|
||||||
b37 = new Reference("b37", new File("/Users/depristo/Desktop/broadLocal/localData/human_g1k_v37.fasta"))
|
b37 = new Reference("b37", new File("/Users/depristo/Desktop/broadLocal/localData/human_g1k_v37.fasta"))
|
||||||
hg18 = new Reference("hg18", new File("/Users/depristo/Desktop/broadLocal/localData/Homo_sapiens_assembly18.fasta"))
|
hg18 = new Reference("hg18", new File("/Users/depristo/Desktop/broadLocal/localData/Homo_sapiens_assembly18.fasta"))
|
||||||
exampleFASTA = new Reference("exampleFASTA", new File("testdata/exampleFASTA.fasta"))
|
exampleFASTA = new Reference("exampleFASTA", new File("public/testdata/exampleFASTA.fasta"))
|
||||||
refs = List(b37, hg18, exampleFASTA)
|
refs = List(b37, hg18, exampleFASTA)
|
||||||
|
|
||||||
val DATAROOT = "/Users/depristo/Desktop/broadLocal/localData/"
|
val DATAROOT = "/Users/depristo/Desktop/broadLocal/localData/"
|
||||||
|
|
@ -94,7 +94,7 @@ class GATKResourcesBundle extends QScript {
|
||||||
addResource(new Resource(DATAROOT + "dbsnp_132_b37.vcf", "dbsnp_132", b37, true, false))
|
addResource(new Resource(DATAROOT + "dbsnp_132_b37.vcf", "dbsnp_132", b37, true, false))
|
||||||
|
|
||||||
addResource(new Resource(exampleFASTA.file, "exampleFASTA", exampleFASTA, false))
|
addResource(new Resource(exampleFASTA.file, "exampleFASTA", exampleFASTA, false))
|
||||||
addResource(new Resource("testdata/exampleBAM.bam", "exampleBAM", exampleFASTA, false))
|
addResource(new Resource("public/testdata/exampleBAM.bam", "exampleBAM", exampleFASTA, false))
|
||||||
}
|
}
|
||||||
|
|
||||||
def initializeStandardDataFiles() = {
|
def initializeStandardDataFiles() = {
|
||||||
|
|
@ -105,7 +105,7 @@ class GATKResourcesBundle extends QScript {
|
||||||
b37 = new Reference("b37", new File("/humgen/1kg/reference/human_g1k_v37.fasta"))
|
b37 = new Reference("b37", new File("/humgen/1kg/reference/human_g1k_v37.fasta"))
|
||||||
hg18 = new Reference("hg18", new File("/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta"))
|
hg18 = new Reference("hg18", new File("/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta"))
|
||||||
b36 = new Reference("b36", new File("/humgen/1kg/reference/human_b36_both.fasta"))
|
b36 = new Reference("b36", new File("/humgen/1kg/reference/human_b36_both.fasta"))
|
||||||
exampleFASTA = new Reference("exampleFASTA", new File("testdata/exampleFASTA.fasta"))
|
exampleFASTA = new Reference("exampleFASTA", new File("public/testdata/exampleFASTA.fasta"))
|
||||||
refs = List(hg19, b37, hg18, b36, exampleFASTA)
|
refs = List(hg19, b37, hg18, b36, exampleFASTA)
|
||||||
|
|
||||||
addResource(new Resource(b37.file, "", b37, false))
|
addResource(new Resource(b37.file, "", b37, false))
|
||||||
|
|
@ -155,8 +155,8 @@ class GATKResourcesBundle extends QScript {
|
||||||
addResource(new Resource("/humgen/gsa-hpprojects/GATK/data/refGene_b37.sorted.txt",
|
addResource(new Resource("/humgen/gsa-hpprojects/GATK/data/refGene_b37.sorted.txt",
|
||||||
"refGene", b37, true, false))
|
"refGene", b37, true, false))
|
||||||
|
|
||||||
addResource(new Resource("chainFiles/hg18tob37.chain", "", hg18, false, false))
|
addResource(new Resource("public/chainFiles/hg18tob37.chain", "", hg18, false, false))
|
||||||
addResource(new Resource("chainFiles/b36tob37.chain", "", b36, false, false))
|
addResource(new Resource("public/chainFiles/b36tob37.chain", "", b36, false, false))
|
||||||
|
|
||||||
// todo -- chain files?
|
// todo -- chain files?
|
||||||
// todo 1000G SNP and indel call sets?
|
// todo 1000G SNP and indel call sets?
|
||||||
|
|
@ -165,7 +165,7 @@ class GATKResourcesBundle extends QScript {
|
||||||
// exampleFASTA file
|
// exampleFASTA file
|
||||||
//
|
//
|
||||||
addResource(new Resource(exampleFASTA.file, "exampleFASTA", exampleFASTA, false))
|
addResource(new Resource(exampleFASTA.file, "exampleFASTA", exampleFASTA, false))
|
||||||
addResource(new Resource("testdata/exampleBAM.bam", "exampleBAM", exampleFASTA, false))
|
addResource(new Resource("public/testdata/exampleBAM.bam", "exampleBAM", exampleFASTA, false))
|
||||||
}
|
}
|
||||||
|
|
||||||
def createBundleDirectories(dir: File) = {
|
def createBundleDirectories(dir: File) = {
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue