Add ability to output to a file discordant loci and their respective genotypes for each sample

This commit is contained in:
Laura Gauthier 2014-05-02 14:50:17 -04:00
parent f03a12263a
commit bf7b97393e
4 changed files with 34 additions and 32 deletions

View File

@ -135,7 +135,7 @@ public class ConcordanceMetricsUnitTest extends BaseTest {
VCFCodec codec = new VCFCodec();
VCFHeader evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
VCFHeader compHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader,false);
ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader,null);
metrics.update(eval,truth);
Assert.assertEquals(eval.getGenotype("test1_sample2").getType().ordinal(), 2);
Assert.assertEquals(truth.getGenotype("test1_sample2").getType().ordinal(),1);
@ -185,7 +185,7 @@ public class ConcordanceMetricsUnitTest extends BaseTest {
VCFCodec codec = new VCFCodec();
VCFHeader evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
VCFHeader compHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader,false);
ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader,null);
metrics.update(eval,truth);
Assert.assertEquals(eval.getGenotype("test1_sample2").getType().ordinal(), 2);
Assert.assertEquals(truth.getGenotype("test1_sample2").getType().ordinal(),2);
@ -205,7 +205,7 @@ public class ConcordanceMetricsUnitTest extends BaseTest {
codec = new VCFCodec();
evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
compHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
metrics = new ConcordanceMetrics(evalHeader,compHeader,false);
metrics = new ConcordanceMetrics(evalHeader,compHeader,null);
metrics.update(eval,truth);
Assert.assertEquals(eval.getGenotype("test1_sample2").getType().ordinal(), 2);
Assert.assertEquals(truth.getGenotype("test1_sample2").getType().ordinal(),2);
@ -260,7 +260,7 @@ public class ConcordanceMetricsUnitTest extends BaseTest {
VCFCodec codec = new VCFCodec();
VCFHeader evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
VCFHeader compHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader,false);
ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader,null);
metrics.update(eval,truth);
Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample1").getnMismatchingAlt(),1);
Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getTable()[2][1],0);
@ -313,7 +313,7 @@ public class ConcordanceMetricsUnitTest extends BaseTest {
VCFCodec codec = new VCFCodec();
VCFHeader evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
VCFHeader compHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader,false);
ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader,null);
metrics.update(eval,truth);
Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getnMismatchingAlt(),0);
Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getTable()[2][1],0);
@ -362,7 +362,7 @@ public class ConcordanceMetricsUnitTest extends BaseTest {
VCFCodec codec = new VCFCodec();
VCFHeader evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
VCFHeader compHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader,false);
ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader,null);
metrics.update(eval,truth);
Assert.assertTrue(eval.getGenotype("test1_sample2").getType().equals(GenotypeType.UNAVAILABLE));
Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getnMismatchingAlt(),0);
@ -516,7 +516,7 @@ public class ConcordanceMetricsUnitTest extends BaseTest {
VCFCodec codec = new VCFCodec();
VCFHeader evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_2_HEADER))));
VCFHeader compHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_2_HEADER))));
ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader,false);
ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader,null);
for ( Pair<VariantContext,VariantContext> contextPair : data ) {
VariantContext eval = contextPair.getFirst();
@ -550,7 +550,7 @@ public class ConcordanceMetricsUnitTest extends BaseTest {
VCFCodec codec = new VCFCodec();
VCFHeader evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
VCFHeader compHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader,false);
ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader,null);
int[][] table = metrics.getOverallGenotypeConcordance().getTable();
// set up the table
table[0] = new int[] {30, 12, 7, 5, 6, 0};
@ -585,8 +585,8 @@ public class ConcordanceMetricsUnitTest extends BaseTest {
VCFHeader evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_3_HEADER_1))));
VCFHeader disjointCompHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_3_HEADER_2))));
VCFHeader overlapCompHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_3_HEADER_3))));
ConcordanceMetrics disjointMetrics = new ConcordanceMetrics(evalHeader,disjointCompHeader,false);
ConcordanceMetrics overlapMetrics = new ConcordanceMetrics(evalHeader,overlapCompHeader,false);
ConcordanceMetrics disjointMetrics = new ConcordanceMetrics(evalHeader,disjointCompHeader,null);
ConcordanceMetrics overlapMetrics = new ConcordanceMetrics(evalHeader,overlapCompHeader,null);
// test what happens if you put in disjoint sets and start making requests
Assert.assertEquals(0,disjointMetrics.getPerSampleGenotypeConcordance().size());
@ -716,7 +716,7 @@ public class ConcordanceMetricsUnitTest extends BaseTest {
VCFCodec codec = new VCFCodec();
VCFHeader evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_2_HEADER))));
VCFHeader compHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_2_HEADER))));
ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader,false);
ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader,null);
List<Pair<VariantContext,VariantContext>> data = getData7();

View File

@ -60,11 +60,11 @@ public class GenotypeConcordanceIntegrationTest extends WalkerTest {
}
@Test
public void testIndelConcordance() {
public void testIndelConcordanceWithSiteOutput() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString("NA12878.Jan2013.haplotypeCaller.subset.indels.vcf", "NA12878.Jan2013.bestPractices.subset.indels.vcf"),
0,
Arrays.asList("e4368146ffed2c6abf8265f5fbc5875d")
baseTestString("NA12878.Jan2013.haplotypeCaller.subset.indels.vcf", "NA12878.Jan2013.bestPractices.subset.indels.vcf") + " -sites %s",
2,
Arrays.asList("e4368146ffed2c6abf8265f5fbc5875d","1f441f00dd4243982502722c981a1e51")
);
executeTest("test indel concordance", spec);

View File

@ -32,6 +32,7 @@ import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.variant.variantcontext.*;
import org.broadinstitute.variant.vcf.VCFHeader;
import java.io.PrintStream;
import java.util.*;
/**
@ -39,12 +40,12 @@ import java.util.*;
* */
public class ConcordanceMetrics {
private Map<String,GenotypeConcordanceTable> perSampleGenotypeConcordance;
private GenotypeConcordanceTable overallGenotypeConcordance;
private SiteConcordanceTable overallSiteConcordance;
private boolean printInterestingSites;
final private Map<String,GenotypeConcordanceTable> perSampleGenotypeConcordance;
final private GenotypeConcordanceTable overallGenotypeConcordance;
final private SiteConcordanceTable overallSiteConcordance;
final PrintStream sitesFile;
public ConcordanceMetrics(VCFHeader evaluate, VCFHeader truth, boolean printSitesEnabled) {
public ConcordanceMetrics(VCFHeader evaluate, VCFHeader truth, PrintStream inputSitesFile) {
HashSet<String> overlappingSamples = new HashSet<String>(evaluate.getGenotypeSamples());
overlappingSamples.retainAll(truth.getGenotypeSamples());
perSampleGenotypeConcordance = new HashMap<String, GenotypeConcordanceTable>(overlappingSamples.size());
@ -53,7 +54,12 @@ public class ConcordanceMetrics {
}
overallGenotypeConcordance = new GenotypeConcordanceTable();
overallSiteConcordance = new SiteConcordanceTable();
printInterestingSites = printSitesEnabled;
sitesFile = inputSitesFile;
if (sitesFile != null) printSitesFileHeader();
}
private void printSitesFileHeader() {
sitesFile.println("Locus\tSample\tTruth Genotype\tEval Genotype");
}
public GenotypeConcordanceTable getOverallGenotypeConcordance() {
@ -134,11 +140,8 @@ public class ConcordanceMetrics {
}
perSampleGenotypeConcordance.get(sample).update(evalGenotype,truthGenotype,alleleTruth,truthRef);
doPrint = overallGenotypeConcordance.update(evalGenotype,truthGenotype,alleleTruth,truthRef);
if(printInterestingSites && doPrint)
System.out.println(eval.getChr() + ":" + eval.getStart() + "\t truth is:" + truthGenotype.getType() + "\t eval is:" + evalGenotype.getType());
//Below is code to print out mismatched alternate alleles
//System.out.println(eval.getChr() + ":" + eval.getStart() + "\t truth is:" + truthGenotype.getAlleles() + "\t eval is:" + evalGenotype.getAlleles());
if(sitesFile != null && doPrint)
sitesFile.println(eval.getChr() + ":" + eval.getStart() + "\t" + sample + "\t" + truthGenotype.getType() + "\t" + evalGenotype.getType());
}
}

View File

@ -246,13 +246,12 @@ public class GenotypeConcordance extends RodWalker<List<Pair<VariantContext,Vari
/**
* Print sites where genotypes are mismatched between callsets along with annotations giving the genotype of each callset
* Outputs directly to System.out. Super classy.
* to the given filename
*
* NOTE: doesn't currently differentiate between samples, so there may be repeats
*/
@Hidden
@Argument(shortName="sites", fullName = "printInterestingSites", required=false)
protected boolean printSites = false;
@Argument(shortName = "sites",required = false,fullName = "printInterestingSites", doc="File to output the discordant sites and genotypes.")
private PrintStream sitesFile = null;
@Output
PrintStream out;
@ -285,7 +284,7 @@ public class GenotypeConcordance extends RodWalker<List<Pair<VariantContext,Vari
evalSamples = evalHeader.getGenotypeSamples();
VCFHeader compHeader = headerMap.get(compBinding.getName());
compSamples = compHeader.getGenotypeSamples();
return new ConcordanceMetrics(evalHeader,compHeader, printSites);
return new ConcordanceMetrics(evalHeader,compHeader, sitesFile);
}