Added hidden flag to GenotypeConcordance to output sites of discordant genotypes (to System.out)
Revised ConcondanceMetrics tests to adapt to change Added comments to PosteriorLikelihoodsUtils
This commit is contained in:
parent
1a1fb8cf6f
commit
7f9f58dbd1
|
|
@ -67,14 +67,18 @@ public class PosteriorLikelihoodsUtils {
|
||||||
throw new IllegalArgumentException("EM loop for posterior GLs not yet implemented");
|
throw new IllegalArgumentException("EM loop for posterior GLs not yet implemented");
|
||||||
|
|
||||||
final Map<Allele,Integer> totalAlleleCounts = new HashMap<>();
|
final Map<Allele,Integer> totalAlleleCounts = new HashMap<>();
|
||||||
|
|
||||||
|
//store the allele counts for each allele in the variant priors
|
||||||
for ( final VariantContext resource : resources ) {
|
for ( final VariantContext resource : resources ) {
|
||||||
addAlleleCounts(totalAlleleCounts,resource,useAC);
|
addAlleleCounts(totalAlleleCounts,resource,useAC);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//add the allele counts from the input samples (if applicable)
|
||||||
if ( useInputSamples ) {
|
if ( useInputSamples ) {
|
||||||
addAlleleCounts(totalAlleleCounts,vc1,useAC);
|
addAlleleCounts(totalAlleleCounts,vc1,useAC);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//add zero allele counts for any reference alleles not seen in priors (if applicable)
|
||||||
totalAlleleCounts.put(vc1.getReference(),totalAlleleCounts.get(vc1.getReference())+numRefSamplesFromMissingResources);
|
totalAlleleCounts.put(vc1.getReference(),totalAlleleCounts.get(vc1.getReference())+numRefSamplesFromMissingResources);
|
||||||
|
|
||||||
// now extract the counts of the alleles present within vc1, and in order
|
// now extract the counts of the alleles present within vc1, and in order
|
||||||
|
|
@ -86,6 +90,7 @@ public class PosteriorLikelihoodsUtils {
|
||||||
totalAlleleCounts.get(allele) : 0 );
|
totalAlleleCounts.get(allele) : 0 );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//parse the likelihoods for each sample's genotype
|
||||||
final List<double[]> likelihoods = new ArrayList<>(vc1.getNSamples());
|
final List<double[]> likelihoods = new ArrayList<>(vc1.getNSamples());
|
||||||
for ( final Genotype genotype : vc1.getGenotypes() ) {
|
for ( final Genotype genotype : vc1.getGenotypes() ) {
|
||||||
likelihoods.add(genotype.hasLikelihoods() ? genotype.getLikelihoods().getAsVector() : null );
|
likelihoods.add(genotype.hasLikelihoods() ? genotype.getLikelihoods().getAsVector() : null );
|
||||||
|
|
@ -196,13 +201,24 @@ public class PosteriorLikelihoodsUtils {
|
||||||
return priors;
|
return priors;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parse counts for each allele
|
||||||
|
* @param counts - Map to store and return data
|
||||||
|
* @param context - line to be parsed from the input VCF file
|
||||||
|
* @param useAC - use allele count annotation value from VariantContext (vs. MLEAC)
|
||||||
|
*/
|
||||||
private static void addAlleleCounts(final Map<Allele,Integer> counts, final VariantContext context, final boolean useAC) {
|
private static void addAlleleCounts(final Map<Allele,Integer> counts, final VariantContext context, final boolean useAC) {
|
||||||
final int[] ac;
|
final int[] ac;
|
||||||
|
//use MLEAC value...
|
||||||
if ( context.hasAttribute(VCFConstants.MLE_ALLELE_COUNT_KEY) && ! useAC ) {
|
if ( context.hasAttribute(VCFConstants.MLE_ALLELE_COUNT_KEY) && ! useAC ) {
|
||||||
ac = extractInts(context.getAttribute(VCFConstants.MLE_ALLELE_COUNT_KEY));
|
ac = extractInts(context.getAttribute(VCFConstants.MLE_ALLELE_COUNT_KEY));
|
||||||
} else if ( context.hasAttribute(VCFConstants.ALLELE_COUNT_KEY) ) {
|
}
|
||||||
|
//...unless specified by the user in useAC
|
||||||
|
else if ( context.hasAttribute(VCFConstants.ALLELE_COUNT_KEY) ) {
|
||||||
ac = extractInts(context.getAttribute(VCFConstants.ALLELE_COUNT_KEY));
|
ac = extractInts(context.getAttribute(VCFConstants.ALLELE_COUNT_KEY));
|
||||||
} else {
|
}
|
||||||
|
//if VariantContext annotation doesn't contain AC/MLEAC then get the data from another field
|
||||||
|
else {
|
||||||
ac = new int[context.getAlternateAlleles().size()];
|
ac = new int[context.getAlternateAlleles().size()];
|
||||||
int idx = 0;
|
int idx = 0;
|
||||||
for ( final Allele allele : context.getAlternateAlleles() ) {
|
for ( final Allele allele : context.getAlternateAlleles() ) {
|
||||||
|
|
@ -210,24 +226,33 @@ public class PosteriorLikelihoodsUtils {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//since the allele count for the reference allele is not given in the VCF format,
|
||||||
|
//calculate it from the allele number minus the total counts for alternate alleles
|
||||||
for ( final Allele allele : context.getAlleles() ) {
|
for ( final Allele allele : context.getAlleles() ) {
|
||||||
final int count;
|
final int count;
|
||||||
if ( allele.isReference() ) {
|
if ( allele.isReference() ) {
|
||||||
if ( context.hasAttribute(VCFConstants.ALLELE_NUMBER_KEY) ) {
|
if ( context.hasAttribute(VCFConstants.ALLELE_NUMBER_KEY) ) {
|
||||||
count = context.getAttributeAsInt(VCFConstants.ALLELE_NUMBER_KEY,-1) - (int) MathUtils.sum(ac);
|
count = Math.max(context.getAttributeAsInt(VCFConstants.ALLELE_NUMBER_KEY,-1) - (int) MathUtils.sum(ac),0); //occasionally an MLEAC value will sneak in that's greater than the AN
|
||||||
} else {
|
} else {
|
||||||
count = context.getCalledChrCount() - (int) MathUtils.sum(ac);
|
count = Math.max(context.getCalledChrCount() - (int) MathUtils.sum(ac),0);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
count = ac[context.getAlternateAlleles().indexOf(allele)];
|
count = ac[context.getAlternateAlleles().indexOf(allele)];
|
||||||
}
|
}
|
||||||
|
//if this allele isn't in the map yet, add it
|
||||||
if ( ! counts.containsKey(allele) ) {
|
if ( ! counts.containsKey(allele) ) {
|
||||||
counts.put(allele,0);
|
counts.put(allele,0);
|
||||||
}
|
}
|
||||||
|
//add the count for the current allele to the existing value in the map
|
||||||
counts.put(allele,count + counts.get(allele));
|
counts.put(allele,count + counts.get(allele));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check the formatting on the Object returned by a call to VariantContext::getAttribute() and parse appropriately
|
||||||
|
* @param integerListContainingVCField - Object returned by a call to VariantContext::getAttribute()
|
||||||
|
* @return - array of ints
|
||||||
|
*/
|
||||||
public static int[] extractInts(final Object integerListContainingVCField) {
|
public static int[] extractInts(final Object integerListContainingVCField) {
|
||||||
List<Integer> mleList = null;
|
List<Integer> mleList = null;
|
||||||
if ( integerListContainingVCField instanceof List ) {
|
if ( integerListContainingVCField instanceof List ) {
|
||||||
|
|
|
||||||
|
|
@ -135,7 +135,7 @@ public class ConcordanceMetricsUnitTest extends BaseTest {
|
||||||
VCFCodec codec = new VCFCodec();
|
VCFCodec codec = new VCFCodec();
|
||||||
VCFHeader evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
|
VCFHeader evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
|
||||||
VCFHeader compHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
|
VCFHeader compHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
|
||||||
ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader);
|
ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader,false);
|
||||||
metrics.update(eval,truth);
|
metrics.update(eval,truth);
|
||||||
Assert.assertEquals(eval.getGenotype("test1_sample2").getType().ordinal(), 2);
|
Assert.assertEquals(eval.getGenotype("test1_sample2").getType().ordinal(), 2);
|
||||||
Assert.assertEquals(truth.getGenotype("test1_sample2").getType().ordinal(),1);
|
Assert.assertEquals(truth.getGenotype("test1_sample2").getType().ordinal(),1);
|
||||||
|
|
@ -185,7 +185,7 @@ public class ConcordanceMetricsUnitTest extends BaseTest {
|
||||||
VCFCodec codec = new VCFCodec();
|
VCFCodec codec = new VCFCodec();
|
||||||
VCFHeader evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
|
VCFHeader evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
|
||||||
VCFHeader compHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
|
VCFHeader compHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
|
||||||
ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader);
|
ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader,false);
|
||||||
metrics.update(eval,truth);
|
metrics.update(eval,truth);
|
||||||
Assert.assertEquals(eval.getGenotype("test1_sample2").getType().ordinal(), 2);
|
Assert.assertEquals(eval.getGenotype("test1_sample2").getType().ordinal(), 2);
|
||||||
Assert.assertEquals(truth.getGenotype("test1_sample2").getType().ordinal(),2);
|
Assert.assertEquals(truth.getGenotype("test1_sample2").getType().ordinal(),2);
|
||||||
|
|
@ -205,7 +205,7 @@ public class ConcordanceMetricsUnitTest extends BaseTest {
|
||||||
codec = new VCFCodec();
|
codec = new VCFCodec();
|
||||||
evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
|
evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
|
||||||
compHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
|
compHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
|
||||||
metrics = new ConcordanceMetrics(evalHeader,compHeader);
|
metrics = new ConcordanceMetrics(evalHeader,compHeader,false);
|
||||||
metrics.update(eval,truth);
|
metrics.update(eval,truth);
|
||||||
Assert.assertEquals(eval.getGenotype("test1_sample2").getType().ordinal(), 2);
|
Assert.assertEquals(eval.getGenotype("test1_sample2").getType().ordinal(), 2);
|
||||||
Assert.assertEquals(truth.getGenotype("test1_sample2").getType().ordinal(),2);
|
Assert.assertEquals(truth.getGenotype("test1_sample2").getType().ordinal(),2);
|
||||||
|
|
@ -260,7 +260,7 @@ public class ConcordanceMetricsUnitTest extends BaseTest {
|
||||||
VCFCodec codec = new VCFCodec();
|
VCFCodec codec = new VCFCodec();
|
||||||
VCFHeader evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
|
VCFHeader evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
|
||||||
VCFHeader compHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
|
VCFHeader compHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
|
||||||
ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader);
|
ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader,false);
|
||||||
metrics.update(eval,truth);
|
metrics.update(eval,truth);
|
||||||
Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample1").getnMismatchingAlt(),1);
|
Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample1").getnMismatchingAlt(),1);
|
||||||
Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getTable()[2][1],0);
|
Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getTable()[2][1],0);
|
||||||
|
|
@ -313,7 +313,7 @@ public class ConcordanceMetricsUnitTest extends BaseTest {
|
||||||
VCFCodec codec = new VCFCodec();
|
VCFCodec codec = new VCFCodec();
|
||||||
VCFHeader evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
|
VCFHeader evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
|
||||||
VCFHeader compHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
|
VCFHeader compHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
|
||||||
ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader);
|
ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader,false);
|
||||||
metrics.update(eval,truth);
|
metrics.update(eval,truth);
|
||||||
Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getnMismatchingAlt(),0);
|
Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getnMismatchingAlt(),0);
|
||||||
Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getTable()[2][1],0);
|
Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getTable()[2][1],0);
|
||||||
|
|
@ -362,7 +362,7 @@ public class ConcordanceMetricsUnitTest extends BaseTest {
|
||||||
VCFCodec codec = new VCFCodec();
|
VCFCodec codec = new VCFCodec();
|
||||||
VCFHeader evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
|
VCFHeader evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
|
||||||
VCFHeader compHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
|
VCFHeader compHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
|
||||||
ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader);
|
ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader,false);
|
||||||
metrics.update(eval,truth);
|
metrics.update(eval,truth);
|
||||||
Assert.assertTrue(eval.getGenotype("test1_sample2").getType().equals(GenotypeType.UNAVAILABLE));
|
Assert.assertTrue(eval.getGenotype("test1_sample2").getType().equals(GenotypeType.UNAVAILABLE));
|
||||||
Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getnMismatchingAlt(),0);
|
Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getnMismatchingAlt(),0);
|
||||||
|
|
@ -516,7 +516,7 @@ public class ConcordanceMetricsUnitTest extends BaseTest {
|
||||||
VCFCodec codec = new VCFCodec();
|
VCFCodec codec = new VCFCodec();
|
||||||
VCFHeader evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_2_HEADER))));
|
VCFHeader evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_2_HEADER))));
|
||||||
VCFHeader compHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_2_HEADER))));
|
VCFHeader compHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_2_HEADER))));
|
||||||
ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader);
|
ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader,false);
|
||||||
|
|
||||||
for ( Pair<VariantContext,VariantContext> contextPair : data ) {
|
for ( Pair<VariantContext,VariantContext> contextPair : data ) {
|
||||||
VariantContext eval = contextPair.getFirst();
|
VariantContext eval = contextPair.getFirst();
|
||||||
|
|
@ -550,7 +550,7 @@ public class ConcordanceMetricsUnitTest extends BaseTest {
|
||||||
VCFCodec codec = new VCFCodec();
|
VCFCodec codec = new VCFCodec();
|
||||||
VCFHeader evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
|
VCFHeader evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
|
||||||
VCFHeader compHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
|
VCFHeader compHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
|
||||||
ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader);
|
ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader,false);
|
||||||
int[][] table = metrics.getOverallGenotypeConcordance().getTable();
|
int[][] table = metrics.getOverallGenotypeConcordance().getTable();
|
||||||
// set up the table
|
// set up the table
|
||||||
table[0] = new int[] {30, 12, 7, 5, 6, 0};
|
table[0] = new int[] {30, 12, 7, 5, 6, 0};
|
||||||
|
|
@ -585,8 +585,8 @@ public class ConcordanceMetricsUnitTest extends BaseTest {
|
||||||
VCFHeader evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_3_HEADER_1))));
|
VCFHeader evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_3_HEADER_1))));
|
||||||
VCFHeader disjointCompHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_3_HEADER_2))));
|
VCFHeader disjointCompHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_3_HEADER_2))));
|
||||||
VCFHeader overlapCompHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_3_HEADER_3))));
|
VCFHeader overlapCompHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_3_HEADER_3))));
|
||||||
ConcordanceMetrics disjointMetrics = new ConcordanceMetrics(evalHeader,disjointCompHeader);
|
ConcordanceMetrics disjointMetrics = new ConcordanceMetrics(evalHeader,disjointCompHeader,false);
|
||||||
ConcordanceMetrics overlapMetrics = new ConcordanceMetrics(evalHeader,overlapCompHeader);
|
ConcordanceMetrics overlapMetrics = new ConcordanceMetrics(evalHeader,overlapCompHeader,false);
|
||||||
|
|
||||||
// test what happens if you put in disjoint sets and start making requests
|
// test what happens if you put in disjoint sets and start making requests
|
||||||
Assert.assertEquals(0,disjointMetrics.getPerSampleGenotypeConcordance().size());
|
Assert.assertEquals(0,disjointMetrics.getPerSampleGenotypeConcordance().size());
|
||||||
|
|
@ -716,7 +716,7 @@ public class ConcordanceMetricsUnitTest extends BaseTest {
|
||||||
VCFCodec codec = new VCFCodec();
|
VCFCodec codec = new VCFCodec();
|
||||||
VCFHeader evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_2_HEADER))));
|
VCFHeader evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_2_HEADER))));
|
||||||
VCFHeader compHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_2_HEADER))));
|
VCFHeader compHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_2_HEADER))));
|
||||||
ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader);
|
ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader,false);
|
||||||
|
|
||||||
List<Pair<VariantContext,VariantContext>> data = getData7();
|
List<Pair<VariantContext,VariantContext>> data = getData7();
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -42,8 +42,9 @@ public class ConcordanceMetrics {
|
||||||
private Map<String,GenotypeConcordanceTable> perSampleGenotypeConcordance;
|
private Map<String,GenotypeConcordanceTable> perSampleGenotypeConcordance;
|
||||||
private GenotypeConcordanceTable overallGenotypeConcordance;
|
private GenotypeConcordanceTable overallGenotypeConcordance;
|
||||||
private SiteConcordanceTable overallSiteConcordance;
|
private SiteConcordanceTable overallSiteConcordance;
|
||||||
|
private Boolean printInterestingSites;
|
||||||
|
|
||||||
public ConcordanceMetrics(VCFHeader evaluate, VCFHeader truth) {
|
public ConcordanceMetrics(VCFHeader evaluate, VCFHeader truth, Boolean printSitesEnabled) {
|
||||||
HashSet<String> overlappingSamples = new HashSet<String>(evaluate.getGenotypeSamples());
|
HashSet<String> overlappingSamples = new HashSet<String>(evaluate.getGenotypeSamples());
|
||||||
overlappingSamples.retainAll(truth.getGenotypeSamples());
|
overlappingSamples.retainAll(truth.getGenotypeSamples());
|
||||||
perSampleGenotypeConcordance = new HashMap<String, GenotypeConcordanceTable>(overlappingSamples.size());
|
perSampleGenotypeConcordance = new HashMap<String, GenotypeConcordanceTable>(overlappingSamples.size());
|
||||||
|
|
@ -52,6 +53,7 @@ public class ConcordanceMetrics {
|
||||||
}
|
}
|
||||||
overallGenotypeConcordance = new GenotypeConcordanceTable();
|
overallGenotypeConcordance = new GenotypeConcordanceTable();
|
||||||
overallSiteConcordance = new SiteConcordanceTable();
|
overallSiteConcordance = new SiteConcordanceTable();
|
||||||
|
printInterestingSites = printSitesEnabled;
|
||||||
}
|
}
|
||||||
|
|
||||||
public GenotypeConcordanceTable getOverallGenotypeConcordance() {
|
public GenotypeConcordanceTable getOverallGenotypeConcordance() {
|
||||||
|
|
@ -114,6 +116,7 @@ public class ConcordanceMetrics {
|
||||||
|
|
||||||
@Requires({"eval != null","truth != null"})
|
@Requires({"eval != null","truth != null"})
|
||||||
public void update(VariantContext eval, VariantContext truth) {
|
public void update(VariantContext eval, VariantContext truth) {
|
||||||
|
Boolean doPrint = false;
|
||||||
overallSiteConcordance.update(eval,truth);
|
overallSiteConcordance.update(eval,truth);
|
||||||
Set<String> alleleTruth = new HashSet<String>(8);
|
Set<String> alleleTruth = new HashSet<String>(8);
|
||||||
String truthRef = truth.getReference().getBaseString();
|
String truthRef = truth.getReference().getBaseString();
|
||||||
|
|
@ -130,7 +133,12 @@ public class ConcordanceMetrics {
|
||||||
throw new UserException(String.format("Concordance Metrics is currently only implemented for DIPLOID genotypes, found eval ploidy: %d, comp ploidy: %d",evalGenotype.getPloidy(),truthGenotype.getPloidy()));
|
throw new UserException(String.format("Concordance Metrics is currently only implemented for DIPLOID genotypes, found eval ploidy: %d, comp ploidy: %d",evalGenotype.getPloidy(),truthGenotype.getPloidy()));
|
||||||
}
|
}
|
||||||
perSampleGenotypeConcordance.get(sample).update(evalGenotype,truthGenotype,alleleTruth,truthRef);
|
perSampleGenotypeConcordance.get(sample).update(evalGenotype,truthGenotype,alleleTruth,truthRef);
|
||||||
overallGenotypeConcordance.update(evalGenotype,truthGenotype,alleleTruth,truthRef);
|
doPrint = overallGenotypeConcordance.update(evalGenotype,truthGenotype,alleleTruth,truthRef);
|
||||||
|
if(printInterestingSites && doPrint)
|
||||||
|
System.out.println(eval.getChr() + ":" + eval.getStart() + "\t truth is:" + truthGenotype.getType() + "\t eval is:" + evalGenotype.getType());
|
||||||
|
|
||||||
|
//Below is code to print out mismatched alternate alleles
|
||||||
|
//System.out.println(eval.getChr() + ":" + eval.getStart() + "\t truth is:" + truthGenotype.getAlleles() + "\t eval is:" + evalGenotype.getAlleles());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -212,13 +220,14 @@ public class ConcordanceMetrics {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Requires({"eval!=null","truth != null","truthAlleles != null"})
|
@Requires({"eval!=null","truth != null","truthAlleles != null"})
|
||||||
public void update(Genotype eval, Genotype truth, Set<String> truthAlleles, String truthRef) {
|
public Boolean update(Genotype eval, Genotype truth, Set<String> truthAlleles, String truthRef) {
|
||||||
// this is slow but correct.
|
// this is slow but correct.
|
||||||
|
|
||||||
// NOTE: a reference call in "truth" is a special case, the eval can match *any* of the truth alleles
|
// NOTE: a reference call in "truth" is a special case, the eval can match *any* of the truth alleles
|
||||||
// that is, if the reference base is C, and a sample is C/C in truth, A/C, A/A, T/C, T/T will
|
// that is, if the reference base is C, and a sample is C/C in truth, A/C, A/A, T/C, T/T will
|
||||||
// all match, so long as A and T are alleles in the truth callset.
|
// all match, so long as A and T are alleles in the truth callset.
|
||||||
boolean matchingAlt = true;
|
boolean matchingAlt = true;
|
||||||
|
int evalGT, truthGT;
|
||||||
if ( eval.isCalled() && truth.isCalled() && truth.isHomRef() ) {
|
if ( eval.isCalled() && truth.isCalled() && truth.isHomRef() ) {
|
||||||
// by default, no-calls "match" between alleles, so if
|
// by default, no-calls "match" between alleles, so if
|
||||||
// one or both sites are no-call or unavailable, the alt alleles match
|
// one or both sites are no-call or unavailable, the alt alleles match
|
||||||
|
|
@ -241,10 +250,17 @@ public class ConcordanceMetrics {
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( matchingAlt ) {
|
if ( matchingAlt ) {
|
||||||
genotypeCounts[eval.getType().ordinal()][truth.getType().ordinal()]++;
|
evalGT = eval.getType().ordinal();
|
||||||
|
truthGT = truth.getType().ordinal();
|
||||||
|
genotypeCounts[evalGT][truthGT]++;
|
||||||
|
if(evalGT != truthGT) //report variants where genotypes don't match
|
||||||
|
return true;
|
||||||
} else {
|
} else {
|
||||||
nMismatchingAlt++;
|
nMismatchingAlt++;
|
||||||
|
return false;
|
||||||
|
//return true; //alternatively, report variants where alt alleles don't match
|
||||||
}
|
}
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
public int[][] getTable() {
|
public int[][] getTable() {
|
||||||
|
|
|
||||||
|
|
@ -25,10 +25,7 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.walkers.variantutils;
|
package org.broadinstitute.sting.gatk.walkers.variantutils;
|
||||||
|
|
||||||
import org.broadinstitute.sting.commandline.Argument;
|
import org.broadinstitute.sting.commandline.*;
|
||||||
import org.broadinstitute.sting.commandline.Input;
|
|
||||||
import org.broadinstitute.sting.commandline.Output;
|
|
||||||
import org.broadinstitute.sting.commandline.RodBinding;
|
|
||||||
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
|
|
@ -213,6 +210,16 @@ public class GenotypeConcordance extends RodWalker<List<Pair<VariantContext,Vari
|
||||||
@Argument(shortName="moltenize",fullName="moltenize",doc="Molten rather than tabular output")
|
@Argument(shortName="moltenize",fullName="moltenize",doc="Molten rather than tabular output")
|
||||||
public boolean moltenize = false;
|
public boolean moltenize = false;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Print sites where genotypes are mismatched between callsets along with annotations giving the genotype of each callset
|
||||||
|
* Outputs directly to System.out. Super classy.
|
||||||
|
*
|
||||||
|
* NOTE: doesn't currently differentiate between samples, so there may be repeats
|
||||||
|
*/
|
||||||
|
@Hidden
|
||||||
|
@Argument(shortName="sites", fullName = "printInterestingSites", required=false)
|
||||||
|
protected boolean printSites = false;
|
||||||
|
|
||||||
@Output
|
@Output
|
||||||
PrintStream out;
|
PrintStream out;
|
||||||
|
|
||||||
|
|
@ -244,7 +251,7 @@ public class GenotypeConcordance extends RodWalker<List<Pair<VariantContext,Vari
|
||||||
evalSamples = evalHeader.getGenotypeSamples();
|
evalSamples = evalHeader.getGenotypeSamples();
|
||||||
VCFHeader compHeader = headerMap.get(compBinding.getName());
|
VCFHeader compHeader = headerMap.get(compBinding.getName());
|
||||||
compSamples = compHeader.getGenotypeSamples();
|
compSamples = compHeader.getGenotypeSamples();
|
||||||
return new ConcordanceMetrics(evalHeader,compHeader);
|
return new ConcordanceMetrics(evalHeader,compHeader, printSites);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -347,8 +354,10 @@ public class GenotypeConcordance extends RodWalker<List<Pair<VariantContext,Vari
|
||||||
}
|
}
|
||||||
|
|
||||||
public ConcordanceMetrics reduce(List<Pair<VariantContext,VariantContext>> evalCompList, ConcordanceMetrics metrics) {
|
public ConcordanceMetrics reduce(List<Pair<VariantContext,VariantContext>> evalCompList, ConcordanceMetrics metrics) {
|
||||||
for ( Pair<VariantContext,VariantContext> evalComp : evalCompList)
|
for ( Pair<VariantContext,VariantContext> evalComp : evalCompList){
|
||||||
metrics.update(evalComp.getFirst(),evalComp.getSecond());
|
metrics.update(evalComp.getFirst(),evalComp.getSecond());
|
||||||
|
|
||||||
|
}
|
||||||
return metrics;
|
return metrics;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue