Add ability to output to a file discordant loci and their respective genotypes for each sample
This commit is contained in:
parent
f03a12263a
commit
bf7b97393e
|
|
@ -135,7 +135,7 @@ public class ConcordanceMetricsUnitTest extends BaseTest {
|
|||
VCFCodec codec = new VCFCodec();
|
||||
VCFHeader evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
|
||||
VCFHeader compHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
|
||||
ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader,false);
|
||||
ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader,null);
|
||||
metrics.update(eval,truth);
|
||||
Assert.assertEquals(eval.getGenotype("test1_sample2").getType().ordinal(), 2);
|
||||
Assert.assertEquals(truth.getGenotype("test1_sample2").getType().ordinal(),1);
|
||||
|
|
@ -185,7 +185,7 @@ public class ConcordanceMetricsUnitTest extends BaseTest {
|
|||
VCFCodec codec = new VCFCodec();
|
||||
VCFHeader evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
|
||||
VCFHeader compHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
|
||||
ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader,false);
|
||||
ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader,null);
|
||||
metrics.update(eval,truth);
|
||||
Assert.assertEquals(eval.getGenotype("test1_sample2").getType().ordinal(), 2);
|
||||
Assert.assertEquals(truth.getGenotype("test1_sample2").getType().ordinal(),2);
|
||||
|
|
@ -205,7 +205,7 @@ public class ConcordanceMetricsUnitTest extends BaseTest {
|
|||
codec = new VCFCodec();
|
||||
evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
|
||||
compHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
|
||||
metrics = new ConcordanceMetrics(evalHeader,compHeader,false);
|
||||
metrics = new ConcordanceMetrics(evalHeader,compHeader,null);
|
||||
metrics.update(eval,truth);
|
||||
Assert.assertEquals(eval.getGenotype("test1_sample2").getType().ordinal(), 2);
|
||||
Assert.assertEquals(truth.getGenotype("test1_sample2").getType().ordinal(),2);
|
||||
|
|
@ -260,7 +260,7 @@ public class ConcordanceMetricsUnitTest extends BaseTest {
|
|||
VCFCodec codec = new VCFCodec();
|
||||
VCFHeader evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
|
||||
VCFHeader compHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
|
||||
ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader,false);
|
||||
ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader,null);
|
||||
metrics.update(eval,truth);
|
||||
Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample1").getnMismatchingAlt(),1);
|
||||
Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getTable()[2][1],0);
|
||||
|
|
@ -313,7 +313,7 @@ public class ConcordanceMetricsUnitTest extends BaseTest {
|
|||
VCFCodec codec = new VCFCodec();
|
||||
VCFHeader evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
|
||||
VCFHeader compHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
|
||||
ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader,false);
|
||||
ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader,null);
|
||||
metrics.update(eval,truth);
|
||||
Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getnMismatchingAlt(),0);
|
||||
Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getTable()[2][1],0);
|
||||
|
|
@ -362,7 +362,7 @@ public class ConcordanceMetricsUnitTest extends BaseTest {
|
|||
VCFCodec codec = new VCFCodec();
|
||||
VCFHeader evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
|
||||
VCFHeader compHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
|
||||
ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader,false);
|
||||
ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader,null);
|
||||
metrics.update(eval,truth);
|
||||
Assert.assertTrue(eval.getGenotype("test1_sample2").getType().equals(GenotypeType.UNAVAILABLE));
|
||||
Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getnMismatchingAlt(),0);
|
||||
|
|
@ -516,7 +516,7 @@ public class ConcordanceMetricsUnitTest extends BaseTest {
|
|||
VCFCodec codec = new VCFCodec();
|
||||
VCFHeader evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_2_HEADER))));
|
||||
VCFHeader compHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_2_HEADER))));
|
||||
ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader,false);
|
||||
ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader,null);
|
||||
|
||||
for ( Pair<VariantContext,VariantContext> contextPair : data ) {
|
||||
VariantContext eval = contextPair.getFirst();
|
||||
|
|
@ -550,7 +550,7 @@ public class ConcordanceMetricsUnitTest extends BaseTest {
|
|||
VCFCodec codec = new VCFCodec();
|
||||
VCFHeader evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
|
||||
VCFHeader compHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
|
||||
ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader,false);
|
||||
ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader,null);
|
||||
int[][] table = metrics.getOverallGenotypeConcordance().getTable();
|
||||
// set up the table
|
||||
table[0] = new int[] {30, 12, 7, 5, 6, 0};
|
||||
|
|
@ -585,8 +585,8 @@ public class ConcordanceMetricsUnitTest extends BaseTest {
|
|||
VCFHeader evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_3_HEADER_1))));
|
||||
VCFHeader disjointCompHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_3_HEADER_2))));
|
||||
VCFHeader overlapCompHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_3_HEADER_3))));
|
||||
ConcordanceMetrics disjointMetrics = new ConcordanceMetrics(evalHeader,disjointCompHeader,false);
|
||||
ConcordanceMetrics overlapMetrics = new ConcordanceMetrics(evalHeader,overlapCompHeader,false);
|
||||
ConcordanceMetrics disjointMetrics = new ConcordanceMetrics(evalHeader,disjointCompHeader,null);
|
||||
ConcordanceMetrics overlapMetrics = new ConcordanceMetrics(evalHeader,overlapCompHeader,null);
|
||||
|
||||
// test what happens if you put in disjoint sets and start making requests
|
||||
Assert.assertEquals(0,disjointMetrics.getPerSampleGenotypeConcordance().size());
|
||||
|
|
@ -716,7 +716,7 @@ public class ConcordanceMetricsUnitTest extends BaseTest {
|
|||
VCFCodec codec = new VCFCodec();
|
||||
VCFHeader evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_2_HEADER))));
|
||||
VCFHeader compHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_2_HEADER))));
|
||||
ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader,false);
|
||||
ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader,null);
|
||||
|
||||
List<Pair<VariantContext,VariantContext>> data = getData7();
|
||||
|
||||
|
|
|
|||
|
|
@ -60,11 +60,11 @@ public class GenotypeConcordanceIntegrationTest extends WalkerTest {
|
|||
}
|
||||
|
||||
@Test
|
||||
public void testIndelConcordance() {
|
||||
public void testIndelConcordanceWithSiteOutput() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString("NA12878.Jan2013.haplotypeCaller.subset.indels.vcf", "NA12878.Jan2013.bestPractices.subset.indels.vcf"),
|
||||
0,
|
||||
Arrays.asList("e4368146ffed2c6abf8265f5fbc5875d")
|
||||
baseTestString("NA12878.Jan2013.haplotypeCaller.subset.indels.vcf", "NA12878.Jan2013.bestPractices.subset.indels.vcf") + " -sites %s",
|
||||
2,
|
||||
Arrays.asList("e4368146ffed2c6abf8265f5fbc5875d","1f441f00dd4243982502722c981a1e51")
|
||||
);
|
||||
|
||||
executeTest("test indel concordance", spec);
|
||||
|
|
|
|||
|
|
@ -32,6 +32,7 @@ import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
|||
import org.broadinstitute.variant.variantcontext.*;
|
||||
import org.broadinstitute.variant.vcf.VCFHeader;
|
||||
|
||||
import java.io.PrintStream;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
|
|
@ -39,12 +40,12 @@ import java.util.*;
|
|||
* */
|
||||
public class ConcordanceMetrics {
|
||||
|
||||
private Map<String,GenotypeConcordanceTable> perSampleGenotypeConcordance;
|
||||
private GenotypeConcordanceTable overallGenotypeConcordance;
|
||||
private SiteConcordanceTable overallSiteConcordance;
|
||||
private boolean printInterestingSites;
|
||||
final private Map<String,GenotypeConcordanceTable> perSampleGenotypeConcordance;
|
||||
final private GenotypeConcordanceTable overallGenotypeConcordance;
|
||||
final private SiteConcordanceTable overallSiteConcordance;
|
||||
final PrintStream sitesFile;
|
||||
|
||||
public ConcordanceMetrics(VCFHeader evaluate, VCFHeader truth, boolean printSitesEnabled) {
|
||||
public ConcordanceMetrics(VCFHeader evaluate, VCFHeader truth, PrintStream inputSitesFile) {
|
||||
HashSet<String> overlappingSamples = new HashSet<String>(evaluate.getGenotypeSamples());
|
||||
overlappingSamples.retainAll(truth.getGenotypeSamples());
|
||||
perSampleGenotypeConcordance = new HashMap<String, GenotypeConcordanceTable>(overlappingSamples.size());
|
||||
|
|
@ -53,7 +54,12 @@ public class ConcordanceMetrics {
|
|||
}
|
||||
overallGenotypeConcordance = new GenotypeConcordanceTable();
|
||||
overallSiteConcordance = new SiteConcordanceTable();
|
||||
printInterestingSites = printSitesEnabled;
|
||||
sitesFile = inputSitesFile;
|
||||
if (sitesFile != null) printSitesFileHeader();
|
||||
}
|
||||
|
||||
private void printSitesFileHeader() {
|
||||
sitesFile.println("Locus\tSample\tTruth Genotype\tEval Genotype");
|
||||
}
|
||||
|
||||
public GenotypeConcordanceTable getOverallGenotypeConcordance() {
|
||||
|
|
@ -134,11 +140,8 @@ public class ConcordanceMetrics {
|
|||
}
|
||||
perSampleGenotypeConcordance.get(sample).update(evalGenotype,truthGenotype,alleleTruth,truthRef);
|
||||
doPrint = overallGenotypeConcordance.update(evalGenotype,truthGenotype,alleleTruth,truthRef);
|
||||
if(printInterestingSites && doPrint)
|
||||
System.out.println(eval.getChr() + ":" + eval.getStart() + "\t truth is:" + truthGenotype.getType() + "\t eval is:" + evalGenotype.getType());
|
||||
|
||||
//Below is code to print out mismatched alternate alleles
|
||||
//System.out.println(eval.getChr() + ":" + eval.getStart() + "\t truth is:" + truthGenotype.getAlleles() + "\t eval is:" + evalGenotype.getAlleles());
|
||||
if(sitesFile != null && doPrint)
|
||||
sitesFile.println(eval.getChr() + ":" + eval.getStart() + "\t" + sample + "\t" + truthGenotype.getType() + "\t" + evalGenotype.getType());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -246,13 +246,12 @@ public class GenotypeConcordance extends RodWalker<List<Pair<VariantContext,Vari
|
|||
|
||||
/**
|
||||
* Print sites where genotypes are mismatched between callsets along with annotations giving the genotype of each callset
|
||||
* Outputs directly to System.out. Super classy.
|
||||
* to the given filename
|
||||
*
|
||||
* NOTE: doesn't currently differentiate between samples, so there may be repeats
|
||||
*/
|
||||
@Hidden
|
||||
@Argument(shortName="sites", fullName = "printInterestingSites", required=false)
|
||||
protected boolean printSites = false;
|
||||
|
||||
@Argument(shortName = "sites",required = false,fullName = "printInterestingSites", doc="File to output the discordant sites and genotypes.")
|
||||
private PrintStream sitesFile = null;
|
||||
|
||||
@Output
|
||||
PrintStream out;
|
||||
|
|
@ -285,7 +284,7 @@ public class GenotypeConcordance extends RodWalker<List<Pair<VariantContext,Vari
|
|||
evalSamples = evalHeader.getGenotypeSamples();
|
||||
VCFHeader compHeader = headerMap.get(compBinding.getName());
|
||||
compSamples = compHeader.getGenotypeSamples();
|
||||
return new ConcordanceMetrics(evalHeader,compHeader, printSites);
|
||||
return new ConcordanceMetrics(evalHeader,compHeader, sitesFile);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue