Added hidden flag to GenotypeConcordance to output sites of discordant genotypes (to System.out)
Revised ConcondanceMetrics tests to adapt to change Added comments to PosteriorLikelihoodsUtils
This commit is contained in:
parent
1a1fb8cf6f
commit
7f9f58dbd1
|
|
@ -67,14 +67,18 @@ public class PosteriorLikelihoodsUtils {
|
|||
throw new IllegalArgumentException("EM loop for posterior GLs not yet implemented");
|
||||
|
||||
final Map<Allele,Integer> totalAlleleCounts = new HashMap<>();
|
||||
|
||||
//store the allele counts for each allele in the variant priors
|
||||
for ( final VariantContext resource : resources ) {
|
||||
addAlleleCounts(totalAlleleCounts,resource,useAC);
|
||||
}
|
||||
|
||||
//add the allele counts from the input samples (if applicable)
|
||||
if ( useInputSamples ) {
|
||||
addAlleleCounts(totalAlleleCounts,vc1,useAC);
|
||||
}
|
||||
|
||||
//add zero allele counts for any reference alleles not seen in priors (if applicable)
|
||||
totalAlleleCounts.put(vc1.getReference(),totalAlleleCounts.get(vc1.getReference())+numRefSamplesFromMissingResources);
|
||||
|
||||
// now extract the counts of the alleles present within vc1, and in order
|
||||
|
|
@ -86,6 +90,7 @@ public class PosteriorLikelihoodsUtils {
|
|||
totalAlleleCounts.get(allele) : 0 );
|
||||
}
|
||||
|
||||
//parse the likelihoods for each sample's genotype
|
||||
final List<double[]> likelihoods = new ArrayList<>(vc1.getNSamples());
|
||||
for ( final Genotype genotype : vc1.getGenotypes() ) {
|
||||
likelihoods.add(genotype.hasLikelihoods() ? genotype.getLikelihoods().getAsVector() : null );
|
||||
|
|
@ -196,13 +201,24 @@ public class PosteriorLikelihoodsUtils {
|
|||
return priors;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse counts for each allele
|
||||
* @param counts - Map to store and return data
|
||||
* @param context - line to be parsed from the input VCF file
|
||||
* @param useAC - use allele count annotation value from VariantContext (vs. MLEAC)
|
||||
*/
|
||||
private static void addAlleleCounts(final Map<Allele,Integer> counts, final VariantContext context, final boolean useAC) {
|
||||
final int[] ac;
|
||||
//use MLEAC value...
|
||||
if ( context.hasAttribute(VCFConstants.MLE_ALLELE_COUNT_KEY) && ! useAC ) {
|
||||
ac = extractInts(context.getAttribute(VCFConstants.MLE_ALLELE_COUNT_KEY));
|
||||
} else if ( context.hasAttribute(VCFConstants.ALLELE_COUNT_KEY) ) {
|
||||
}
|
||||
//...unless specified by the user in useAC
|
||||
else if ( context.hasAttribute(VCFConstants.ALLELE_COUNT_KEY) ) {
|
||||
ac = extractInts(context.getAttribute(VCFConstants.ALLELE_COUNT_KEY));
|
||||
} else {
|
||||
}
|
||||
//if VariantContext annotation doesn't contain AC/MLEAC then get the data from another field
|
||||
else {
|
||||
ac = new int[context.getAlternateAlleles().size()];
|
||||
int idx = 0;
|
||||
for ( final Allele allele : context.getAlternateAlleles() ) {
|
||||
|
|
@ -210,24 +226,33 @@ public class PosteriorLikelihoodsUtils {
|
|||
}
|
||||
}
|
||||
|
||||
//since the allele count for the reference allele is not given in the VCF format,
|
||||
//calculate it from the allele number minus the total counts for alternate alleles
|
||||
for ( final Allele allele : context.getAlleles() ) {
|
||||
final int count;
|
||||
if ( allele.isReference() ) {
|
||||
if ( context.hasAttribute(VCFConstants.ALLELE_NUMBER_KEY) ) {
|
||||
count = context.getAttributeAsInt(VCFConstants.ALLELE_NUMBER_KEY,-1) - (int) MathUtils.sum(ac);
|
||||
count = Math.max(context.getAttributeAsInt(VCFConstants.ALLELE_NUMBER_KEY,-1) - (int) MathUtils.sum(ac),0); //occasionally an MLEAC value will sneak in that's greater than the AN
|
||||
} else {
|
||||
count = context.getCalledChrCount() - (int) MathUtils.sum(ac);
|
||||
count = Math.max(context.getCalledChrCount() - (int) MathUtils.sum(ac),0);
|
||||
}
|
||||
} else {
|
||||
count = ac[context.getAlternateAlleles().indexOf(allele)];
|
||||
}
|
||||
//if this allele isn't in the map yet, add it
|
||||
if ( ! counts.containsKey(allele) ) {
|
||||
counts.put(allele,0);
|
||||
}
|
||||
//add the count for the current allele to the existing value in the map
|
||||
counts.put(allele,count + counts.get(allele));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check the formatting on the Object returned by a call to VariantContext::getAttribute() and parse appropriately
|
||||
* @param integerListContainingVCField - Object returned by a call to VariantContext::getAttribute()
|
||||
* @return - array of ints
|
||||
*/
|
||||
public static int[] extractInts(final Object integerListContainingVCField) {
|
||||
List<Integer> mleList = null;
|
||||
if ( integerListContainingVCField instanceof List ) {
|
||||
|
|
|
|||
|
|
@ -135,7 +135,7 @@ public class ConcordanceMetricsUnitTest extends BaseTest {
|
|||
VCFCodec codec = new VCFCodec();
|
||||
VCFHeader evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
|
||||
VCFHeader compHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
|
||||
ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader);
|
||||
ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader,false);
|
||||
metrics.update(eval,truth);
|
||||
Assert.assertEquals(eval.getGenotype("test1_sample2").getType().ordinal(), 2);
|
||||
Assert.assertEquals(truth.getGenotype("test1_sample2").getType().ordinal(),1);
|
||||
|
|
@ -185,7 +185,7 @@ public class ConcordanceMetricsUnitTest extends BaseTest {
|
|||
VCFCodec codec = new VCFCodec();
|
||||
VCFHeader evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
|
||||
VCFHeader compHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
|
||||
ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader);
|
||||
ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader,false);
|
||||
metrics.update(eval,truth);
|
||||
Assert.assertEquals(eval.getGenotype("test1_sample2").getType().ordinal(), 2);
|
||||
Assert.assertEquals(truth.getGenotype("test1_sample2").getType().ordinal(),2);
|
||||
|
|
@ -205,7 +205,7 @@ public class ConcordanceMetricsUnitTest extends BaseTest {
|
|||
codec = new VCFCodec();
|
||||
evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
|
||||
compHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
|
||||
metrics = new ConcordanceMetrics(evalHeader,compHeader);
|
||||
metrics = new ConcordanceMetrics(evalHeader,compHeader,false);
|
||||
metrics.update(eval,truth);
|
||||
Assert.assertEquals(eval.getGenotype("test1_sample2").getType().ordinal(), 2);
|
||||
Assert.assertEquals(truth.getGenotype("test1_sample2").getType().ordinal(),2);
|
||||
|
|
@ -260,7 +260,7 @@ public class ConcordanceMetricsUnitTest extends BaseTest {
|
|||
VCFCodec codec = new VCFCodec();
|
||||
VCFHeader evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
|
||||
VCFHeader compHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
|
||||
ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader);
|
||||
ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader,false);
|
||||
metrics.update(eval,truth);
|
||||
Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample1").getnMismatchingAlt(),1);
|
||||
Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getTable()[2][1],0);
|
||||
|
|
@ -313,7 +313,7 @@ public class ConcordanceMetricsUnitTest extends BaseTest {
|
|||
VCFCodec codec = new VCFCodec();
|
||||
VCFHeader evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
|
||||
VCFHeader compHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
|
||||
ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader);
|
||||
ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader,false);
|
||||
metrics.update(eval,truth);
|
||||
Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getnMismatchingAlt(),0);
|
||||
Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getTable()[2][1],0);
|
||||
|
|
@ -362,7 +362,7 @@ public class ConcordanceMetricsUnitTest extends BaseTest {
|
|||
VCFCodec codec = new VCFCodec();
|
||||
VCFHeader evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
|
||||
VCFHeader compHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
|
||||
ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader);
|
||||
ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader,false);
|
||||
metrics.update(eval,truth);
|
||||
Assert.assertTrue(eval.getGenotype("test1_sample2").getType().equals(GenotypeType.UNAVAILABLE));
|
||||
Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getnMismatchingAlt(),0);
|
||||
|
|
@ -516,7 +516,7 @@ public class ConcordanceMetricsUnitTest extends BaseTest {
|
|||
VCFCodec codec = new VCFCodec();
|
||||
VCFHeader evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_2_HEADER))));
|
||||
VCFHeader compHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_2_HEADER))));
|
||||
ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader);
|
||||
ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader,false);
|
||||
|
||||
for ( Pair<VariantContext,VariantContext> contextPair : data ) {
|
||||
VariantContext eval = contextPair.getFirst();
|
||||
|
|
@ -550,7 +550,7 @@ public class ConcordanceMetricsUnitTest extends BaseTest {
|
|||
VCFCodec codec = new VCFCodec();
|
||||
VCFHeader evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
|
||||
VCFHeader compHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
|
||||
ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader);
|
||||
ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader,false);
|
||||
int[][] table = metrics.getOverallGenotypeConcordance().getTable();
|
||||
// set up the table
|
||||
table[0] = new int[] {30, 12, 7, 5, 6, 0};
|
||||
|
|
@ -585,8 +585,8 @@ public class ConcordanceMetricsUnitTest extends BaseTest {
|
|||
VCFHeader evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_3_HEADER_1))));
|
||||
VCFHeader disjointCompHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_3_HEADER_2))));
|
||||
VCFHeader overlapCompHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_3_HEADER_3))));
|
||||
ConcordanceMetrics disjointMetrics = new ConcordanceMetrics(evalHeader,disjointCompHeader);
|
||||
ConcordanceMetrics overlapMetrics = new ConcordanceMetrics(evalHeader,overlapCompHeader);
|
||||
ConcordanceMetrics disjointMetrics = new ConcordanceMetrics(evalHeader,disjointCompHeader,false);
|
||||
ConcordanceMetrics overlapMetrics = new ConcordanceMetrics(evalHeader,overlapCompHeader,false);
|
||||
|
||||
// test what happens if you put in disjoint sets and start making requests
|
||||
Assert.assertEquals(0,disjointMetrics.getPerSampleGenotypeConcordance().size());
|
||||
|
|
@ -716,7 +716,7 @@ public class ConcordanceMetricsUnitTest extends BaseTest {
|
|||
VCFCodec codec = new VCFCodec();
|
||||
VCFHeader evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_2_HEADER))));
|
||||
VCFHeader compHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_2_HEADER))));
|
||||
ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader);
|
||||
ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader,false);
|
||||
|
||||
List<Pair<VariantContext,VariantContext>> data = getData7();
|
||||
|
||||
|
|
|
|||
|
|
@ -42,8 +42,9 @@ public class ConcordanceMetrics {
|
|||
private Map<String,GenotypeConcordanceTable> perSampleGenotypeConcordance;
|
||||
private GenotypeConcordanceTable overallGenotypeConcordance;
|
||||
private SiteConcordanceTable overallSiteConcordance;
|
||||
private Boolean printInterestingSites;
|
||||
|
||||
public ConcordanceMetrics(VCFHeader evaluate, VCFHeader truth) {
|
||||
public ConcordanceMetrics(VCFHeader evaluate, VCFHeader truth, Boolean printSitesEnabled) {
|
||||
HashSet<String> overlappingSamples = new HashSet<String>(evaluate.getGenotypeSamples());
|
||||
overlappingSamples.retainAll(truth.getGenotypeSamples());
|
||||
perSampleGenotypeConcordance = new HashMap<String, GenotypeConcordanceTable>(overlappingSamples.size());
|
||||
|
|
@ -52,6 +53,7 @@ public class ConcordanceMetrics {
|
|||
}
|
||||
overallGenotypeConcordance = new GenotypeConcordanceTable();
|
||||
overallSiteConcordance = new SiteConcordanceTable();
|
||||
printInterestingSites = printSitesEnabled;
|
||||
}
|
||||
|
||||
public GenotypeConcordanceTable getOverallGenotypeConcordance() {
|
||||
|
|
@ -114,6 +116,7 @@ public class ConcordanceMetrics {
|
|||
|
||||
@Requires({"eval != null","truth != null"})
|
||||
public void update(VariantContext eval, VariantContext truth) {
|
||||
Boolean doPrint = false;
|
||||
overallSiteConcordance.update(eval,truth);
|
||||
Set<String> alleleTruth = new HashSet<String>(8);
|
||||
String truthRef = truth.getReference().getBaseString();
|
||||
|
|
@ -130,7 +133,12 @@ public class ConcordanceMetrics {
|
|||
throw new UserException(String.format("Concordance Metrics is currently only implemented for DIPLOID genotypes, found eval ploidy: %d, comp ploidy: %d",evalGenotype.getPloidy(),truthGenotype.getPloidy()));
|
||||
}
|
||||
perSampleGenotypeConcordance.get(sample).update(evalGenotype,truthGenotype,alleleTruth,truthRef);
|
||||
overallGenotypeConcordance.update(evalGenotype,truthGenotype,alleleTruth,truthRef);
|
||||
doPrint = overallGenotypeConcordance.update(evalGenotype,truthGenotype,alleleTruth,truthRef);
|
||||
if(printInterestingSites && doPrint)
|
||||
System.out.println(eval.getChr() + ":" + eval.getStart() + "\t truth is:" + truthGenotype.getType() + "\t eval is:" + evalGenotype.getType());
|
||||
|
||||
//Below is code to print out mismatched alternate alleles
|
||||
//System.out.println(eval.getChr() + ":" + eval.getStart() + "\t truth is:" + truthGenotype.getAlleles() + "\t eval is:" + evalGenotype.getAlleles());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -212,13 +220,14 @@ public class ConcordanceMetrics {
|
|||
}
|
||||
|
||||
@Requires({"eval!=null","truth != null","truthAlleles != null"})
|
||||
public void update(Genotype eval, Genotype truth, Set<String> truthAlleles, String truthRef) {
|
||||
public Boolean update(Genotype eval, Genotype truth, Set<String> truthAlleles, String truthRef) {
|
||||
// this is slow but correct.
|
||||
|
||||
// NOTE: a reference call in "truth" is a special case, the eval can match *any* of the truth alleles
|
||||
// that is, if the reference base is C, and a sample is C/C in truth, A/C, A/A, T/C, T/T will
|
||||
// all match, so long as A and T are alleles in the truth callset.
|
||||
boolean matchingAlt = true;
|
||||
int evalGT, truthGT;
|
||||
if ( eval.isCalled() && truth.isCalled() && truth.isHomRef() ) {
|
||||
// by default, no-calls "match" between alleles, so if
|
||||
// one or both sites are no-call or unavailable, the alt alleles match
|
||||
|
|
@ -241,10 +250,17 @@ public class ConcordanceMetrics {
|
|||
}
|
||||
|
||||
if ( matchingAlt ) {
|
||||
genotypeCounts[eval.getType().ordinal()][truth.getType().ordinal()]++;
|
||||
evalGT = eval.getType().ordinal();
|
||||
truthGT = truth.getType().ordinal();
|
||||
genotypeCounts[evalGT][truthGT]++;
|
||||
if(evalGT != truthGT) //report variants where genotypes don't match
|
||||
return true;
|
||||
} else {
|
||||
nMismatchingAlt++;
|
||||
return false;
|
||||
//return true; //alternatively, report variants where alt alleles don't match
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
public int[][] getTable() {
|
||||
|
|
|
|||
|
|
@ -25,10 +25,7 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.variantutils;
|
||||
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.Input;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.commandline.RodBinding;
|
||||
import org.broadinstitute.sting.commandline.*;
|
||||
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
|
|
@ -213,6 +210,16 @@ public class GenotypeConcordance extends RodWalker<List<Pair<VariantContext,Vari
|
|||
@Argument(shortName="moltenize",fullName="moltenize",doc="Molten rather than tabular output")
|
||||
public boolean moltenize = false;
|
||||
|
||||
/**
|
||||
* Print sites where genotypes are mismatched between callsets along with annotations giving the genotype of each callset
|
||||
* Outputs directly to System.out. Super classy.
|
||||
*
|
||||
* NOTE: doesn't currently differentiate between samples, so there may be repeats
|
||||
*/
|
||||
@Hidden
|
||||
@Argument(shortName="sites", fullName = "printInterestingSites", required=false)
|
||||
protected boolean printSites = false;
|
||||
|
||||
@Output
|
||||
PrintStream out;
|
||||
|
||||
|
|
@ -244,7 +251,7 @@ public class GenotypeConcordance extends RodWalker<List<Pair<VariantContext,Vari
|
|||
evalSamples = evalHeader.getGenotypeSamples();
|
||||
VCFHeader compHeader = headerMap.get(compBinding.getName());
|
||||
compSamples = compHeader.getGenotypeSamples();
|
||||
return new ConcordanceMetrics(evalHeader,compHeader);
|
||||
return new ConcordanceMetrics(evalHeader,compHeader, printSites);
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -347,8 +354,10 @@ public class GenotypeConcordance extends RodWalker<List<Pair<VariantContext,Vari
|
|||
}
|
||||
|
||||
public ConcordanceMetrics reduce(List<Pair<VariantContext,VariantContext>> evalCompList, ConcordanceMetrics metrics) {
|
||||
for ( Pair<VariantContext,VariantContext> evalComp : evalCompList)
|
||||
for ( Pair<VariantContext,VariantContext> evalComp : evalCompList){
|
||||
metrics.update(evalComp.getFirst(),evalComp.getSecond());
|
||||
|
||||
}
|
||||
return metrics;
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue