Merge remote-tracking branch 'origin/master' into intel

This commit is contained in:
David Roazen 2014-03-10 14:07:36 -04:00
commit 7c34f05082
20 changed files with 269 additions and 60 deletions

View File

@ -13,7 +13,7 @@
<parent>
<groupId>org.broadinstitute.sting</groupId>
<artifactId>sting-root</artifactId>
<version>2.8-SNAPSHOT</version>
<version>3.1-SNAPSHOT</version>
<relativePath>public/sting-root</relativePath>
</parent>

View File

@ -5,7 +5,7 @@
<parent>
<groupId>org.broadinstitute.sting</groupId>
<artifactId>sting-aggregator</artifactId>
<version>2.8-SNAPSHOT</version>
<version>3.1-SNAPSHOT</version>
<relativePath>../..</relativePath>
</parent>

View File

@ -48,6 +48,7 @@ package org.broadinstitute.sting.gatk.walkers.variantutils;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.variant.variantcontext.*;
import org.broadinstitute.variant.vcf.VCFConstants;
@ -67,14 +68,18 @@ public class PosteriorLikelihoodsUtils {
throw new IllegalArgumentException("EM loop for posterior GLs not yet implemented");
final Map<Allele,Integer> totalAlleleCounts = new HashMap<>();
//store the allele counts for each allele in the variant priors
for ( final VariantContext resource : resources ) {
addAlleleCounts(totalAlleleCounts,resource,useAC);
}
//add the allele counts from the input samples (if applicable)
if ( useInputSamples ) {
addAlleleCounts(totalAlleleCounts,vc1,useAC);
}
//add zero allele counts for any reference alleles not seen in priors (if applicable)
totalAlleleCounts.put(vc1.getReference(),totalAlleleCounts.get(vc1.getReference())+numRefSamplesFromMissingResources);
// now extract the counts of the alleles present within vc1, and in order
@ -86,6 +91,7 @@ public class PosteriorLikelihoodsUtils {
totalAlleleCounts.get(allele) : 0 );
}
//parse the likelihoods for each sample's genotype
final List<double[]> likelihoods = new ArrayList<>(vc1.getNSamples());
for ( final Genotype genotype : vc1.getGenotypes() ) {
likelihoods.add(genotype.hasLikelihoods() ? genotype.getLikelihoods().getAsVector() : null );
@ -196,13 +202,24 @@ public class PosteriorLikelihoodsUtils {
return priors;
}
/**
* Parse counts for each allele
* @param counts - Map to store and return data
* @param context - line to be parsed from the input VCF file
* @param useAC - use allele count annotation value from VariantContext (vs. MLEAC)
*/
private static void addAlleleCounts(final Map<Allele,Integer> counts, final VariantContext context, final boolean useAC) {
final int[] ac;
//use MLEAC value...
if ( context.hasAttribute(VCFConstants.MLE_ALLELE_COUNT_KEY) && ! useAC ) {
ac = extractInts(context.getAttribute(VCFConstants.MLE_ALLELE_COUNT_KEY));
} else if ( context.hasAttribute(VCFConstants.ALLELE_COUNT_KEY) ) {
ac = extractInts(context.getAttribute(VCFConstants.ALLELE_COUNT_KEY));
} else {
ac = getAlleleCounts(VCFConstants.MLE_ALLELE_COUNT_KEY, context);
}
//...unless specified by the user in useAC or unless MLEAC is absent
else if ( context.hasAttribute(VCFConstants.ALLELE_COUNT_KEY) ) {
ac = getAlleleCounts(VCFConstants.ALLELE_COUNT_KEY, context);
}
//if VariantContext annotation doesn't contain AC or MLEAC then get the data from direct evaluation
else {
ac = new int[context.getAlternateAlleles().size()];
int idx = 0;
for ( final Allele allele : context.getAlternateAlleles() ) {
@ -210,24 +227,52 @@ public class PosteriorLikelihoodsUtils {
}
}
//since the allele count for the reference allele is not given in the VCF format,
//calculate it from the allele number minus the total counts for alternate alleles
for ( final Allele allele : context.getAlleles() ) {
final int count;
if ( allele.isReference() ) {
if ( context.hasAttribute(VCFConstants.ALLELE_NUMBER_KEY) ) {
count = context.getAttributeAsInt(VCFConstants.ALLELE_NUMBER_KEY,-1) - (int) MathUtils.sum(ac);
count = Math.max(context.getAttributeAsInt(VCFConstants.ALLELE_NUMBER_KEY,-1) - (int) MathUtils.sum(ac),0); //occasionally an MLEAC value will sneak in that's greater than the AN
} else {
count = context.getCalledChrCount() - (int) MathUtils.sum(ac);
count = Math.max(context.getCalledChrCount() - (int) MathUtils.sum(ac),0);
}
} else {
count = ac[context.getAlternateAlleles().indexOf(allele)];
}
//if this allele isn't in the map yet, add it
if ( ! counts.containsKey(allele) ) {
counts.put(allele,0);
}
//add the count for the current allele to the existing value in the map
counts.put(allele,count + counts.get(allele));
}
}
/**
* Retrieve allele count data from VariantContext using VCFkey, checks for correct number of values in VCF
* @param VCFkey VariantContext annotation tag of interest (should be AC or MLEAC)
* @param context VariantContext from which to extract the data
* @return int[] with allele count data
*/
private static int[] getAlleleCounts(final String VCFkey, final VariantContext context) {
final Object alleleCountsFromVCF = context.getAttribute(VCFkey);
if ( alleleCountsFromVCF instanceof List ) {
if ( ((List) alleleCountsFromVCF).size() != context.getAlternateAlleles().size() )
throw new UserException(String.format("Variant does not contain the same number of MLE allele counts as alternate alleles for record at %s:%d", context.getChr(), context.getStart()));
}
else if ( alleleCountsFromVCF instanceof String || alleleCountsFromVCF instanceof Integer) {//here length is 1
if (context.getAlternateAlleles().size() != 1)
throw new UserException(String.format("Variant does not contain the same number of MLE allele counts as alternate alleles for record at %s:%d", context.getChr(), context.getStart()));
}
return extractInts(alleleCountsFromVCF);
}
/**
* Check the formatting on the Object returned by a call to VariantContext::getAttribute() and parse appropriately
* @param integerListContainingVCField - Object returned by a call to VariantContext::getAttribute()
* @return - array of ints
*/
public static int[] extractInts(final Object integerListContainingVCField) {
List<Integer> mleList = null;
if ( integerListContainingVCField instanceof List ) {

View File

@ -135,7 +135,7 @@ public class ConcordanceMetricsUnitTest extends BaseTest {
VCFCodec codec = new VCFCodec();
VCFHeader evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
VCFHeader compHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader);
ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader,false);
metrics.update(eval,truth);
Assert.assertEquals(eval.getGenotype("test1_sample2").getType().ordinal(), 2);
Assert.assertEquals(truth.getGenotype("test1_sample2").getType().ordinal(),1);
@ -185,7 +185,7 @@ public class ConcordanceMetricsUnitTest extends BaseTest {
VCFCodec codec = new VCFCodec();
VCFHeader evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
VCFHeader compHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader);
ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader,false);
metrics.update(eval,truth);
Assert.assertEquals(eval.getGenotype("test1_sample2").getType().ordinal(), 2);
Assert.assertEquals(truth.getGenotype("test1_sample2").getType().ordinal(),2);
@ -205,7 +205,7 @@ public class ConcordanceMetricsUnitTest extends BaseTest {
codec = new VCFCodec();
evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
compHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
metrics = new ConcordanceMetrics(evalHeader,compHeader);
metrics = new ConcordanceMetrics(evalHeader,compHeader,false);
metrics.update(eval,truth);
Assert.assertEquals(eval.getGenotype("test1_sample2").getType().ordinal(), 2);
Assert.assertEquals(truth.getGenotype("test1_sample2").getType().ordinal(),2);
@ -260,7 +260,7 @@ public class ConcordanceMetricsUnitTest extends BaseTest {
VCFCodec codec = new VCFCodec();
VCFHeader evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
VCFHeader compHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader);
ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader,false);
metrics.update(eval,truth);
Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample1").getnMismatchingAlt(),1);
Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getTable()[2][1],0);
@ -313,7 +313,7 @@ public class ConcordanceMetricsUnitTest extends BaseTest {
VCFCodec codec = new VCFCodec();
VCFHeader evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
VCFHeader compHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader);
ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader,false);
metrics.update(eval,truth);
Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getnMismatchingAlt(),0);
Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getTable()[2][1],0);
@ -362,7 +362,7 @@ public class ConcordanceMetricsUnitTest extends BaseTest {
VCFCodec codec = new VCFCodec();
VCFHeader evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
VCFHeader compHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader);
ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader,false);
metrics.update(eval,truth);
Assert.assertTrue(eval.getGenotype("test1_sample2").getType().equals(GenotypeType.UNAVAILABLE));
Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getnMismatchingAlt(),0);
@ -516,7 +516,7 @@ public class ConcordanceMetricsUnitTest extends BaseTest {
VCFCodec codec = new VCFCodec();
VCFHeader evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_2_HEADER))));
VCFHeader compHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_2_HEADER))));
ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader);
ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader,false);
for ( Pair<VariantContext,VariantContext> contextPair : data ) {
VariantContext eval = contextPair.getFirst();
@ -550,7 +550,7 @@ public class ConcordanceMetricsUnitTest extends BaseTest {
VCFCodec codec = new VCFCodec();
VCFHeader evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
VCFHeader compHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader);
ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader,false);
int[][] table = metrics.getOverallGenotypeConcordance().getTable();
// set up the table
table[0] = new int[] {30, 12, 7, 5, 6, 0};
@ -585,8 +585,8 @@ public class ConcordanceMetricsUnitTest extends BaseTest {
VCFHeader evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_3_HEADER_1))));
VCFHeader disjointCompHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_3_HEADER_2))));
VCFHeader overlapCompHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_3_HEADER_3))));
ConcordanceMetrics disjointMetrics = new ConcordanceMetrics(evalHeader,disjointCompHeader);
ConcordanceMetrics overlapMetrics = new ConcordanceMetrics(evalHeader,overlapCompHeader);
ConcordanceMetrics disjointMetrics = new ConcordanceMetrics(evalHeader,disjointCompHeader,false);
ConcordanceMetrics overlapMetrics = new ConcordanceMetrics(evalHeader,overlapCompHeader,false);
// test what happens if you put in disjoint sets and start making requests
Assert.assertEquals(0,disjointMetrics.getPerSampleGenotypeConcordance().size());
@ -716,7 +716,7 @@ public class ConcordanceMetricsUnitTest extends BaseTest {
VCFCodec codec = new VCFCodec();
VCFHeader evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_2_HEADER))));
VCFHeader compHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_2_HEADER))));
ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader);
ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader,false);
List<Pair<VariantContext,VariantContext>> data = getData7();

View File

@ -52,6 +52,7 @@ package org.broadinstitute.sting.gatk.walkers.variantutils;
* Date: 12/8/13
*/
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.variant.variantcontext.*;
@ -192,6 +193,101 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
Assert.assertEquals(arraysEq(test2exp1.getPL(),_mleparse((List<Integer>) test2result.getGenotype(0).getAnyAttribute(VCFConstants.GENOTYPE_POSTERIORS_KEY))), "");
}
@Test
private void testCalculatePosteriorHOM_VARtoHET() {
VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,40,1,0));
List<VariantContext> supplTest1 = new ArrayList<>(1);
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,500).attribute(VCFConstants.ALLELE_NUMBER_KEY,1000).make());
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false);
int[] GP = _mleparse( (List<Integer>)test1result.getGenotype(0).getAnyAttribute(VCFConstants.GENOTYPE_POSTERIORS_KEY));
Assert.assertTrue(GP[2] > GP[1]);
}
@Test
private void testCalculatePosteriorHETtoHOM_VAR() {
VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,40,0,1));
List<VariantContext> supplTest1 = new ArrayList<>(1);
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,900).attribute(VCFConstants.ALLELE_NUMBER_KEY,1000).make());
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false);
int[] GP = _mleparse( (List<Integer>)test1result.getGenotype(0).getAnyAttribute(VCFConstants.GENOTYPE_POSTERIORS_KEY));
Assert.assertTrue(GP[2] < GP[1]);
}
@Test
private void testCalculatePosteriorHOM_REFtoHET() {
VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,0,1,40));
List<VariantContext> supplTest1 = new ArrayList<>(1);
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,500).attribute(VCFConstants.ALLELE_NUMBER_KEY,1000).make());
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false);
int[] GP = _mleparse( (List<Integer>)test1result.getGenotype(0).getAnyAttribute(VCFConstants.GENOTYPE_POSTERIORS_KEY));
Assert.assertTrue(GP[0] > GP[1]);
}
@Test
private void testCalculatePosteriorHETtoHOM_REF() {
VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,1,0,40));
List<VariantContext> supplTest1 = new ArrayList<>(1);
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,100).attribute(VCFConstants.ALLELE_NUMBER_KEY,1000).make());
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false);
int[] GP = _mleparse( (List<Integer>)test1result.getGenotype(0).getAnyAttribute(VCFConstants.GENOTYPE_POSTERIORS_KEY));
Assert.assertTrue(GP[0] < GP[1]);
}
@Test
private void testMLEACgreaterThanAN() {
VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,40,20,0),
makeG("s2",Aref,T,18,0,24),
makeG("s3",Aref,T,22,0,12));
List<VariantContext> supplTest1 = new ArrayList<>(1);
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,11).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make());
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false);
}
@Test (expectedExceptions = {UserException.class})
private void testWrongNumberACvalues() {
VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,40,20,0),
makeG("s2",Aref,T,18,0,24),
makeG("s3",Aref,T,22,0,12));
List<VariantContext> supplTest1 = new ArrayList<>(1);
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T,C))).attribute(VCFConstants.ALLELE_COUNT_KEY,5).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make());
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false);
}
@Test (expectedExceptions = {UserException.class})
private void testWrongNumberMLEACvalues() {
VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,40,20,0),
makeG("s2",Aref,T,18,0,24),
makeG("s3",Aref,T,22,0,12));
List<VariantContext> supplTest1 = new ArrayList<>(1);
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T,C))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,5).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make());
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false);
}
@Test
private void testMultipleACvalues() {
VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,40,20,0),
makeG("s2",Aref,T,18,0,24),
makeG("s3",Aref,T,22,0,12));
List<VariantContext> supplTest1 = new ArrayList<>(1);
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T,C))).attribute(VCFConstants.ALLELE_COUNT_KEY,Arrays.asList(5,4)).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make());
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false);
}
@Test
private void testMultipleMLEACvalues() {
VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,40,20,0),
makeG("s2",Aref,T,18,0,24),
makeG("s3",Aref,T,22,0,12));
List<VariantContext> supplTest1 = new ArrayList<>(1);
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T,C))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,Arrays.asList(5,4)).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make());
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false);
}
private double[] pl2gl(int[] pl) {
double[] gl = new double[pl.length];
for ( int idx = 0; idx < gl.length; idx++ ) {

View File

@ -5,7 +5,7 @@
<parent>
<groupId>org.broadinstitute.sting</groupId>
<artifactId>sting-root</artifactId>
<version>2.8-SNAPSHOT</version>
<version>3.1-SNAPSHOT</version>
<relativePath>../public/sting-root</relativePath>
</parent>

View File

@ -9,7 +9,7 @@
<name>GATK External Example</name>
<properties>
<sting.version>2.8-SNAPSHOT</sting.version>
<sting.version>3.1-SNAPSHOT</sting.version>
<!--
sting.basedir property must point to your checkout of Sting/GATK until we can get all the
dependencies out of the committed sting repo and into central.

View File

@ -5,7 +5,7 @@
<parent>
<groupId>org.broadinstitute.sting</groupId>
<artifactId>sting-aggregator</artifactId>
<version>2.8-SNAPSHOT</version>
<version>3.1-SNAPSHOT</version>
<relativePath>../..</relativePath>
</parent>

View File

@ -42,8 +42,9 @@ public class ConcordanceMetrics {
private Map<String,GenotypeConcordanceTable> perSampleGenotypeConcordance;
private GenotypeConcordanceTable overallGenotypeConcordance;
private SiteConcordanceTable overallSiteConcordance;
private boolean printInterestingSites;
public ConcordanceMetrics(VCFHeader evaluate, VCFHeader truth) {
public ConcordanceMetrics(VCFHeader evaluate, VCFHeader truth, boolean printSitesEnabled) {
HashSet<String> overlappingSamples = new HashSet<String>(evaluate.getGenotypeSamples());
overlappingSamples.retainAll(truth.getGenotypeSamples());
perSampleGenotypeConcordance = new HashMap<String, GenotypeConcordanceTable>(overlappingSamples.size());
@ -52,6 +53,7 @@ public class ConcordanceMetrics {
}
overallGenotypeConcordance = new GenotypeConcordanceTable();
overallSiteConcordance = new SiteConcordanceTable();
printInterestingSites = printSitesEnabled;
}
public GenotypeConcordanceTable getOverallGenotypeConcordance() {
@ -114,6 +116,7 @@ public class ConcordanceMetrics {
@Requires({"eval != null","truth != null"})
public void update(VariantContext eval, VariantContext truth) {
boolean doPrint = false;
overallSiteConcordance.update(eval,truth);
Set<String> alleleTruth = new HashSet<String>(8);
String truthRef = truth.getReference().getBaseString();
@ -130,7 +133,12 @@ public class ConcordanceMetrics {
throw new UserException(String.format("Concordance Metrics is currently only implemented for DIPLOID genotypes, found eval ploidy: %d, comp ploidy: %d",evalGenotype.getPloidy(),truthGenotype.getPloidy()));
}
perSampleGenotypeConcordance.get(sample).update(evalGenotype,truthGenotype,alleleTruth,truthRef);
overallGenotypeConcordance.update(evalGenotype,truthGenotype,alleleTruth,truthRef);
doPrint = overallGenotypeConcordance.update(evalGenotype,truthGenotype,alleleTruth,truthRef);
if(printInterestingSites && doPrint)
System.out.println(eval.getChr() + ":" + eval.getStart() + "\t truth is:" + truthGenotype.getType() + "\t eval is:" + evalGenotype.getType());
//Below is code to print out mismatched alternate alleles
//System.out.println(eval.getChr() + ":" + eval.getStart() + "\t truth is:" + truthGenotype.getAlleles() + "\t eval is:" + evalGenotype.getAlleles());
}
}
@ -212,13 +220,14 @@ public class ConcordanceMetrics {
}
@Requires({"eval!=null","truth != null","truthAlleles != null"})
public void update(Genotype eval, Genotype truth, Set<String> truthAlleles, String truthRef) {
public Boolean update(Genotype eval, Genotype truth, Set<String> truthAlleles, String truthRef) {
// this is slow but correct.
// NOTE: a reference call in "truth" is a special case, the eval can match *any* of the truth alleles
// that is, if the reference base is C, and a sample is C/C in truth, A/C, A/A, T/C, T/T will
// all match, so long as A and T are alleles in the truth callset.
boolean matchingAlt = true;
int evalGT, truthGT;
if ( eval.isCalled() && truth.isCalled() && truth.isHomRef() ) {
// by default, no-calls "match" between alleles, so if
// one or both sites are no-call or unavailable, the alt alleles match
@ -241,10 +250,17 @@ public class ConcordanceMetrics {
}
if ( matchingAlt ) {
genotypeCounts[eval.getType().ordinal()][truth.getType().ordinal()]++;
evalGT = eval.getType().ordinal();
truthGT = truth.getType().ordinal();
genotypeCounts[evalGT][truthGT]++;
if(evalGT != truthGT) //report variants where genotypes don't match
return true;
} else {
nMismatchingAlt++;
return false;
//return true; //alternatively, report variants where alt alleles don't match
}
return false;
}
public int[][] getTable() {

View File

@ -25,10 +25,7 @@
package org.broadinstitute.sting.gatk.walkers.variantutils;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Input;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
@ -213,6 +210,16 @@ public class GenotypeConcordance extends RodWalker<List<Pair<VariantContext,Vari
@Argument(shortName="moltenize",fullName="moltenize",doc="Molten rather than tabular output")
public boolean moltenize = false;
/**
* Print sites where genotypes are mismatched between callsets along with annotations giving the genotype of each callset
* Outputs directly to System.out. Super classy.
*
* NOTE: doesn't currently differentiate between samples, so there may be repeats
*/
@Hidden
@Argument(shortName="sites", fullName = "printInterestingSites", required=false)
protected boolean printSites = false;
@Output
PrintStream out;
@ -244,7 +251,7 @@ public class GenotypeConcordance extends RodWalker<List<Pair<VariantContext,Vari
evalSamples = evalHeader.getGenotypeSamples();
VCFHeader compHeader = headerMap.get(compBinding.getName());
compSamples = compHeader.getGenotypeSamples();
return new ConcordanceMetrics(evalHeader,compHeader);
return new ConcordanceMetrics(evalHeader,compHeader, printSites);
}
@ -347,8 +354,10 @@ public class GenotypeConcordance extends RodWalker<List<Pair<VariantContext,Vari
}
public ConcordanceMetrics reduce(List<Pair<VariantContext,VariantContext>> evalCompList, ConcordanceMetrics metrics) {
for ( Pair<VariantContext,VariantContext> evalComp : evalCompList)
for ( Pair<VariantContext,VariantContext> evalComp : evalCompList){
metrics.update(evalComp.getFirst(),evalComp.getSecond());
}
return metrics;
}

View File

@ -58,10 +58,10 @@ public class RandomlySplitVariants extends RodWalker<Integer, Integer> {
@ArgumentCollection
protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();
@Output(fullName="out1", shortName="o1", doc="File #1 to which variants should be written", required=true)
@Output(fullName="out1", shortName="o1", doc="File #1 to which variants should be written", required=false, exclusiveOf = "splitToManyFiles")
protected VariantContextWriter vcfWriter1 = null;
@Output(fullName="out2", shortName="o2", doc="File #2 to which variants should be written", required=true)
@Output(fullName="out2", shortName="o2", doc="File #2 to which variants should be written", required=false, exclusiveOf = "splitToManyFiles")
// there's a reported bug in the GATK where we can't have 2 @Output writers
protected File file2 = null;
protected VariantContextWriter vcfWriter2 = null;
@ -69,6 +69,17 @@ public class RandomlySplitVariants extends RodWalker<Integer, Integer> {
@Argument(fullName="fractionToOut1", shortName="fraction", doc="Fraction of records to be placed in out1 (must be 0 >= fraction <= 1); all other records are placed in out2", required=false)
protected double fraction = 0.5;
@Argument(fullName="splitToManyFiles", shortName = "splitToMany", doc="split (with uniform distribution) to more than 2 files. numOfFiles and baseOutputName parameters are required", required = false)
protected boolean splitToMany = false;
@Argument(fullName = "numOfOutputVCFFiles", shortName = "N", doc = "number of output VCF files. Only works with SplitToMany = true", required = false, maxRecommendedValue = 20, minValue = 2)
protected int numOfFiles = -1;
@Argument(fullName = "prefixForAllOutputFileNames", shortName = "baseOutputName", doc = "the name of the output VCF file will be: <baseOutputName>.split.<number>.vcf. Required with SplitToMany option", required = false)
protected String baseFileName = null;
private VariantContextWriter[] writers = null;
/**
* Set up the VCF writer, the sample expressions and regexs, and the JEXL matcher
*/
@ -76,15 +87,37 @@ public class RandomlySplitVariants extends RodWalker<Integer, Integer> {
if ( fraction < 0.0 || fraction > 1.0 )
throw new UserException.BadArgumentValue("fractionToOut1", "this value needs to be a number between 0 and 1");
if (splitToMany){
if (numOfFiles < 2)
throw new UserException.BadArgumentValue("numOfFiles", "this value must be greater than 2 when using the splitToMany option");
if (baseFileName == null)
throw new UserException.BadArgumentValue("baseFileName", "this value cannot be null (unprovided) when using the splitToMany option");
}
else{
if(vcfWriter1 == null || vcfWriter2 == null)
throw new UserException.BadArgumentValue("out1 or out2", "this value cannot be null (unprovided) unless you are using the splitToMany option");
}
// setup the header info
final List<String> inputNames = Arrays.asList(variantCollection.variants.getName());
Set<String> samples = SampleUtils.getUniqueSamplesFromRods(getToolkit(), inputNames);
Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
final Set<String> samples = SampleUtils.getUniqueSamplesFromRods(getToolkit(), inputNames);
final Set<VCFHeaderLine> hInfo = new HashSet<>();
hInfo.addAll(GATKVCFUtils.getHeaderFields(getToolkit(), inputNames));
vcfWriter1.writeHeader(new VCFHeader(hInfo, samples));
vcfWriter2 = VariantContextWriterFactory.create(file2, getMasterSequenceDictionary());
vcfWriter2.writeHeader(new VCFHeader(hInfo, samples));
if(splitToMany){
writers = new VariantContextWriter[numOfFiles];
for(int i = 0; i<writers.length; i++){
writers[i] = VariantContextWriterFactory.create(new File(baseFileName+".split."+i+".vcf"), getMasterSequenceDictionary());
writers[i].writeHeader(new VCFHeader(hInfo,samples));
}
}
else {
vcfWriter1.writeHeader(new VCFHeader(hInfo, samples));
vcfWriter2 = VariantContextWriterFactory.create(file2, getMasterSequenceDictionary());
vcfWriter2.writeHeader(new VCFHeader(hInfo, samples));
}
}
/**
@ -95,17 +128,23 @@ public class RandomlySplitVariants extends RodWalker<Integer, Integer> {
* @param context alignment info
* @return 1 if the record was printed to the output file, 0 if otherwise
*/
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
public Integer map(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context) {
if ( tracker == null )
return 0;
Collection<VariantContext> vcs = tracker.getValues(variantCollection.variants, context.getLocation());
for ( VariantContext vc : vcs ) {
double random = GenomeAnalysisEngine.getRandomGenerator().nextDouble();
if ( random < fraction )
vcfWriter1.add(vc);
else
vcfWriter2.add(vc);
final Collection<VariantContext> vcs = tracker.getValues(variantCollection.variants, context.getLocation());
for ( final VariantContext vc : vcs ) {
final double random = GenomeAnalysisEngine.getRandomGenerator().nextDouble();
if(splitToMany){
final int index = (int)(numOfFiles * random);
writers[index].add(vc);
}
else{
if ( random < fraction )
vcfWriter1.add(vc);
else
vcfWriter2.add(vc);
}
}
return 1;
@ -113,10 +152,14 @@ public class RandomlySplitVariants extends RodWalker<Integer, Integer> {
public Integer reduceInit() { return 0; }
public Integer reduce(Integer value, Integer sum) { return value + sum; }
public Integer reduce(final Integer value, final Integer sum) { return value + sum; }
public void onTraversalDone(Integer result) {
public void onTraversalDone(final Integer result) {
logger.info(result + " records processed.");
vcfWriter2.close();
if(splitToMany)
for(final VariantContextWriter writer: writers)
writer.close();
else
vcfWriter2.close();
}
}

View File

@ -5,7 +5,7 @@
<parent>
<groupId>org.broadinstitute.sting</groupId>
<artifactId>sting-aggregator</artifactId>
<version>2.8-SNAPSHOT</version>
<version>3.1-SNAPSHOT</version>
<relativePath>../..</relativePath>
</parent>

View File

@ -5,7 +5,7 @@
<parent>
<groupId>org.broadinstitute.sting</groupId>
<artifactId>sting-aggregator</artifactId>
<version>2.8-SNAPSHOT</version>
<version>3.1-SNAPSHOT</version>
<relativePath>../..</relativePath>
</parent>

View File

@ -5,7 +5,7 @@
<parent>
<groupId>org.broadinstitute.sting</groupId>
<artifactId>sting-aggregator</artifactId>
<version>2.8-SNAPSHOT</version>
<version>3.1-SNAPSHOT</version>
<relativePath>../..</relativePath>
</parent>

View File

@ -9,7 +9,7 @@
<parent>
<groupId>org.broadinstitute.sting</groupId>
<artifactId>sting-root</artifactId>
<version>2.8-SNAPSHOT</version>
<version>3.1-SNAPSHOT</version>
<relativePath>../sting-root</relativePath>
</parent>

View File

@ -5,7 +5,7 @@
<parent>
<groupId>org.broadinstitute.sting</groupId>
<artifactId>sting-root</artifactId>
<version>2.8-SNAPSHOT</version>
<version>3.1-SNAPSHOT</version>
<relativePath>sting-root</relativePath>
</parent>
@ -19,7 +19,7 @@
<module>sting-utils</module>
<module>gatk-framework</module>
<module>gatk-package</module>
<module>external-example</module>
<!-- <module>external-example</module> -->
<!-- queue optionally enabled as profiles -->
</modules>

View File

@ -5,7 +5,7 @@
<parent>
<groupId>org.broadinstitute.sting</groupId>
<artifactId>sting-aggregator</artifactId>
<version>2.8-SNAPSHOT</version>
<version>3.1-SNAPSHOT</version>
<relativePath>../..</relativePath>
</parent>

View File

@ -5,7 +5,7 @@
<parent>
<groupId>org.broadinstitute.sting</groupId>
<artifactId>sting-aggregator</artifactId>
<version>2.8-SNAPSHOT</version>
<version>3.1-SNAPSHOT</version>
<relativePath>../..</relativePath>
</parent>

View File

@ -12,7 +12,7 @@
<groupId>org.broadinstitute.sting</groupId>
<artifactId>sting-root</artifactId>
<version>2.8-SNAPSHOT</version>
<version>3.1-SNAPSHOT</version>
<packaging>pom</packaging>
<name>Sting Root</name>

View File

@ -5,7 +5,7 @@
<parent>
<groupId>org.broadinstitute.sting</groupId>
<artifactId>sting-aggregator</artifactId>
<version>2.8-SNAPSHOT</version>
<version>3.1-SNAPSHOT</version>
<relativePath>../..</relativePath>
</parent>