Completely refactored the Callset Concordance code. Now, it takes in VCF rods and emits a single VCF file which has merged calls from all inputs and is annotated (in the INFO fields) with the appropriate concordance test(s).

Still needs a bit of polish...


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1999 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2009-11-09 05:03:13 +00:00
parent bc6f24e88f
commit ab705565cf
6 changed files with 201 additions and 250 deletions

View File

@ -4,28 +4,38 @@ import org.broadinstitute.sting.gatk.contexts.*;
import org.broadinstitute.sting.gatk.refdata.*;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.genotype.Genotype;
import org.broadinstitute.sting.utils.genotype.vcf.*;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import java.io.File;
import java.util.*;
/**
* CallsetConcordanceWalker finds the concordance between multiple callsets (different tests are available).
* CallsetConcordanceWalker finds the concordance between multiple VCF callsets (different tests are available).
*/
@Requires(value={DataSource.REFERENCE},
referenceMetaData={@RMD(name="callset1",type=VariationRod.class),
@RMD(name="callset2",type=VariationRod.class)})
@Requires(value={DataSource.REFERENCE})
@Reference(window=@Window(start=-20,stop=20))
public class CallsetConcordanceWalker extends RefWalker<Integer, Integer> {
@Argument(fullName="concordance_output_path", shortName="O", doc="File path to which split sets should be written", required=true)
private String OUTPUT_PATH = null;
public class CallsetConcordanceWalker extends RodWalker<Integer, Integer> {
@Argument(fullName="concordance_output", shortName="CO", doc="VCF file to which output should be written", required=true)
private File OUTPUT = null;
@Argument(fullName="concordanceType", shortName="CT", doc="Concordance subset types to apply to given callsets. Syntax: 'type[:key1=arg1,key2=arg2,...]'", required=false)
private String[] TYPES = null;
@Argument(fullName="list", shortName="ls", doc="List the available concordance types and exit", required=false)
private Boolean LIST_ONLY = false;
// the concordance tests to run
private ArrayList<ConcordanceType> requestedTypes;
// VCF writer for the output of the concordance tests
private VCFWriter vcfWriter;
// a map of rod name to uniquified sample name
HashMap<Pair<String, String>, String> rodNamesToSampleNames = new HashMap<Pair<String, String>, String>();
/**
* Prepare the output file and the list of available features.
*/
@ -35,7 +45,7 @@ public class CallsetConcordanceWalker extends RefWalker<Integer, Integer> {
List<Class<? extends ConcordanceType>> classes = PackageUtils.getClassesImplementingInterface(ConcordanceType.class);
// print and exit if that's what was requested
if (LIST_ONLY) {
if ( LIST_ONLY ) {
out.println("\nAvailable concordance types:");
for (int i = 0; i < classes.size(); i++)
out.println("\t" + classes.get(i).getSimpleName());
@ -43,9 +53,26 @@ public class CallsetConcordanceWalker extends RefWalker<Integer, Integer> {
System.exit(0);
}
requestedTypes = new ArrayList<ConcordanceType>();
// get the list of all sample names from the various input rods (the need to be uniquified in case there's overlap)
HashSet<String> samples = new HashSet<String>();
VCFUtils.getUniquifiedSamplesFromRods(getToolkit(), samples, rodNamesToSampleNames);
for ( java.util.Map.Entry<Pair<String, String>, String> entry : rodNamesToSampleNames.entrySet() ) {
logger.debug("Uniquified sample mapping: " + entry.getKey().first + "/" + entry.getKey().second + " -> " + entry.getValue());
}
// set up the header fields
Map<String, String> hInfo = new HashMap<String, String>();
hInfo.put("format", VCFWriter.VERSION);
hInfo.put("source", "CallsetConcordance");
hInfo.put("reference", getToolkit().getArguments().referenceFile.getName());
hInfo.put("explanation", "This file represents a concordance test of various call sets - NOT the output from a multi-sample caller");
VCFHeader header = new VCFHeader(hInfo, samples);
vcfWriter = new VCFWriter(header, OUTPUT);
// initialize requested concordance types
requestedTypes = new ArrayList<ConcordanceType>();
if (TYPES != null) {
for ( String requestedTypeString : TYPES ) {
String[] requestedPieces = requestedTypeString.split(":");
@ -68,7 +95,7 @@ public class CallsetConcordanceWalker extends RefWalker<Integer, Integer> {
}
}
concordance.initialize(OUTPUT_PATH, requestedArgs);
concordance.initialize(requestedArgs, samples);
requestedTypes.add(concordance);
break;
} catch (InstantiationException e) {
@ -83,25 +110,67 @@ public class CallsetConcordanceWalker extends RefWalker<Integer, Integer> {
throw new StingException("The requested concordance type (" + requestedType + ") isn't a valid concordance option");
}
}
}
}
public Integer reduceInit() { return 0; }
public Integer map(RefMetaDataTracker rodData, ReferenceContext ref, AlignmentContext context) {
if ( rodData == null ) // RodWalkers can make funky map calls
return 0;
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
for ( ConcordanceType type : requestedTypes )
type.computeConcordance(tracker, ref);
// get all of the vcf rods at this locus
ArrayList<RodVCF> vcfRods = new ArrayList<RodVCF>();
Iterator<ReferenceOrderedDatum> rods = rodData.getAllRods().iterator();
while (rods.hasNext()) {
ReferenceOrderedDatum rod = rods.next();
if ( rod instanceof RodVCF )
vcfRods.add((RodVCF)rod);
}
if ( vcfRods.size() == 0 )
return 0;
// create a merged record from all input VCFs
VCFRecord record = VCFUtils.mergeRecords(vcfRods, rodNamesToSampleNames);
// pull out all of the individual calls from the rods and insert into a map based on the
// mapping from rod/sample to uniquified name
HashMap<String, VCFGenotypeCall> samplesToRecords = new HashMap<String, VCFGenotypeCall>();
for ( RodVCF rod : vcfRods ) {
List<Genotype> records = rod.getGenotypes();
for ( Genotype g : records ) {
if ( !(g instanceof VCFGenotypeCall) )
throw new StingException("Expected VCF rod Genotypes to be of type VCFGenotypeCall");
VCFGenotypeCall vcfCall = (VCFGenotypeCall)g;
String uniquifiedSample = rodNamesToSampleNames.get(new Pair<String, String>(rod.getName(), vcfCall.getSampleName()));
if ( uniquifiedSample == null )
throw new StingException("Unexpected sample encountered: " + vcfCall.getSampleName() + " in rod " + rod.getName());
samplesToRecords.put(uniquifiedSample, vcfCall);
}
}
// add in the info fields to the new record based on the results of each of the relevant concordance tests
for ( ConcordanceType type : requestedTypes ) {
String result = type.computeConcordance(samplesToRecords, ref);
if ( result != null ) {
record.addInfoField(type.getInfoName(), result);
}
}
// emit the new record
vcfWriter.addRecord(record);
return 1;
}
public Integer reduceInit() { return 0; }
public Integer reduce(Integer value, Integer sum) {
return sum + value;
}
public void onTraversalDone(Integer result) {
for ( ConcordanceType type : requestedTypes )
type.cleanup();
vcfWriter.close();
out.printf("Processed %d loci.\n", result);
}
}

View File

@ -1,14 +1,14 @@
package org.broadinstitute.sting.playground.gatk.walkers.concordance;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeCall;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
public interface ConcordanceType {
public void initialize(String prefix, HashMap<String,String> args);
public void computeConcordance(RefMetaDataTracker tracker, ReferenceContext ref);
public void cleanup();
public void initialize(Map<String,String> args, Set<String> samples);
public String computeConcordance(Map<String, VCFGenotypeCall> samplesToRecords, ReferenceContext ref);
public String getInfoName();
}

View File

@ -1,14 +1,12 @@
package org.broadinstitute.sting.playground.gatk.walkers.concordance;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.genotype.Variation;
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeCall;
import java.io.FileNotFoundException;
import java.io.PrintWriter;
import java.util.HashMap;
import java.util.*;
/**
* filter an indel callset based on given criteria
@ -22,54 +20,59 @@ public class IndelSubsets implements ConcordanceType {
private int homopolymerCutoff = 1;
private int sizeCutoff = 2;
private PrintWriter[][][][] writers = new PrintWriter[2][2][2][2];
private String[][][][] tags = new String[2][2][2][2];
private String sample1, sample2;
public IndelSubsets() {}
public void initialize(String prefix, HashMap<String,String> args) {
public void initialize(Map<String, String> args, Set<String> samples) {
if ( samples.size() != 2 )
throw new StingException("IndelSubsets concordance test cannot handle anything other than 2 VCF records");
if ( args.get("sizeCutoff") != null )
sizeCutoff = Integer.valueOf(args.get("sizeCutoff"));
if ( args.get("homopolymerCutoff") != null )
homopolymerCutoff = Integer.valueOf(args.get("homopolymerCutoff"));
try {
for (int i = 0; i < 2; i++) {
String name1 = i == 0 ? "set1" : "noSet1";
for (int j = 0; j < 2; j++) {
String name2 = j == 0 ? "set2" : "noSet2";
for (int k = 0; k < 2; k++) {
String name3 = "size" + (k == 0 ? Integer.toString(sizeCutoff)+"AndUnder" : "GreaterThan"+Integer.toString(sizeCutoff));
for (int m = 0; m < 2; m++) {
String name4 = "homopolymer" + (m == 0 ? Integer.toString(homopolymerCutoff)+"AndUnder" : "GreaterThan"+Integer.toString(homopolymerCutoff));
writers[i][j][k][m] = new PrintWriter(prefix + "." + name1 + "." + name2 + "." + name3 + "." + name4 + ".calls");
}
Iterator<String> iter = samples.iterator();
sample1 = iter.next();
sample2 = iter.next();
for (int i = 0; i < 2; i++) {
String name1 = i == 0 ? sample1 : "no_" + sample1;
for (int j = 0; j < 2; j++) {
String name2 = j == 0 ? sample2 : "no_" + sample2;
for (int k = 0; k < 2; k++) {
String name3 = "size" + (k == 0 ? Integer.toString(sizeCutoff)+"AndUnder" : "GreaterThan"+Integer.toString(sizeCutoff));
for (int m = 0; m < 2; m++) {
String name4 = "homopolymer" + (m == 0 ? Integer.toString(homopolymerCutoff)+"AndUnder" : "GreaterThan"+Integer.toString(homopolymerCutoff));
tags[i][j][k][m] = name1 + "." + name2 + "." + name3 + "." + name4;
}
}
}
} catch (FileNotFoundException e) {
throw new StingException(String.format("Could not open file(s) for writing"));
}
}
public void computeConcordance(RefMetaDataTracker tracker, ReferenceContext ref) {
Variation indel1 = (Variation)tracker.lookup("callset1", null);
Variation indel2 = (Variation)tracker.lookup("callset2", null);
public String computeConcordance(Map<String, VCFGenotypeCall> samplesToRecords, ReferenceContext ref) {
int set1 = ( indel1 != null && indel1.isIndel() ? 0 : 1 );
int set2 = ( indel2 != null && indel2.isIndel() ? 0 : 1 );
VCFGenotypeCall indel1 = samplesToRecords.get(sample1);
VCFGenotypeCall indel2 = samplesToRecords.get(sample2);
int set1 = ( indel1 != null && !indel1.isPointGenotype() ? 0 : 1 );
int set2 = ( indel2 != null && !indel2.isPointGenotype() ? 0 : 1 );
// skip this locus if they're both not valid indels
if ( set1 == 1 && set2 == 1 )
return;
return null;
// only deal with a valid indel
Variation indel = ( indel1 != null ? indel1 : indel2 );
Variation indel = ( indel1 != null ? indel1.toVariation() : indel2.toVariation() );
// we only deal with the first allele
int size = ( indel.getAlternateAlleleList().get(0).length() <= sizeCutoff ? 0 : 1 );
int homopol = ( homopolymerRunSize(ref, indel) <= homopolymerCutoff ? 0 : 1 );
writers[set1][set2][size][homopol].println(indel.toString());
return tags[set1][set2][size][homopol];
}
private int homopolymerRunSize(ReferenceContext ref, Variation indel) {
@ -98,11 +101,5 @@ public class IndelSubsets implements ConcordanceType {
return Math.max(leftRun, rightRun);
}
public void cleanup() {
for (int i = 0; i < 2; i++)
for (int j = 0; j < 2; j++)
for (int k = 0; k < 2; k++)
for (int m = 0; m < 2; m++)
writers[i][j][k][m].close();
}
public String getInfoName() { return "IS"; }
}

View File

@ -1,16 +1,9 @@
package org.broadinstitute.sting.playground.gatk.walkers.concordance;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import org.broadinstitute.sting.gatk.refdata.RODRecordList;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.genotype.Variation;
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeCall;
import java.io.FileNotFoundException;
import java.io.PrintWriter;
import java.util.HashMap;
import java.util.*;
import java.util.Map.Entry;
/**
@ -21,86 +14,29 @@ import java.util.Map.Entry;
*/
public class NWayVenn implements ConcordanceType {
// TODO -- change this to use Ryan's generic map object when it's ready
private HashMap<String, PrintWriter> writers = new HashMap<String, PrintWriter>();
private PrintWriter union_writer = null;
private String prefix;
private int N;
public NWayVenn() {}
public void initialize(String prefix, HashMap<String,String> args) {
public void initialize(Map<String, String> args, Set<String> samples) { }
if ( args.get("N") == null )
throw new StingException("NWayVenn concordance module requires the 'N' argument");
N = Integer.valueOf(args.get("N"));
if ( N < 1 )
throw new StingException("NWayVenn concordance module requires that N be greater than 0");
public String computeConcordance(Map<String, VCFGenotypeCall> samplesToRecords, ReferenceContext ref) {
if ( samplesToRecords.size() == 0 )
return null;
this.prefix = prefix;
// create the list of file names
HashMap<String, String> files = new HashMap<String, String>();
files.put("", "");
for (int i = 1; i <= N; i++) {
// create 2 new list entries for each of the current file names:
// one with and one without this set appended to the end
HashMap<String, String> appendedFiles = new HashMap<String, String>();
for ( Entry<String, String> file : files.entrySet() ) {
appendedFiles.put(file.getKey() + "0", file.getValue());
appendedFiles.put(file.getKey() + "1", file.getValue() + ".set" + i);
}
files = appendedFiles;
}
// remove the entry for no hits
files.remove(Utils.dupString('0', N));
try {
for ( Entry<String, String> file : files.entrySet() )
writers.put(file.getKey(), new PrintWriter(prefix + "." + N + "wayVenn" + file.getValue() + ".calls"));
union_writer = new PrintWriter(prefix + "." + N + "wayVenn.union.calls");
} catch (FileNotFoundException e) {
throw new StingException(String.format("Could not open file(s) for writing"));
}
}
public void computeConcordance(RefMetaDataTracker tracker, ReferenceContext ref) {
Variation[] calls = new Variation[N];
int goodRods = 0;
for (int i = 0; i < N; i++) {
RODRecordList<ReferenceOrderedDatum> callList = tracker.getTrackData("callset" + (i+1), null);
if ( callList != null ) {
calls[i] = (Variation)callList.getRecords().get(0);
goodRods++;
}
TreeSet<String> concordantSamples = new TreeSet<String>();
for ( Entry<String, VCFGenotypeCall> entry : samplesToRecords.entrySet() ) {
concordantSamples.add(entry.getKey());
}
if ( goodRods == 0 )
return;
// union
printVariant(union_writer, calls);
// add to the correct set
StringBuilder hashString = new StringBuilder();
for (int i = 0; i < N; i++)
hashString.append(calls[i] != null ? "1" : "0");
printVariant(writers.get(hashString.toString()), calls);
}
private static void printVariant(PrintWriter writer, Variation[] variants) {
for ( Variation variant : variants ) {
if ( variant != null ) {
writer.println(variant.toString());
return;
}
StringBuffer tag = new StringBuffer();
Iterator<String> iter = concordantSamples.iterator();
while ( iter.hasNext() ) {
tag.append(iter.next());
if ( iter.hasNext() )
tag.append(".");
}
return tag.toString();
}
public void cleanup() {
union_writer.close();
for ( PrintWriter writer : writers.values() )
writer.close();
}
public String getInfoName() { return "NV"; }
}

View File

@ -1,16 +1,12 @@
package org.broadinstitute.sting.playground.gatk.walkers.concordance;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import org.broadinstitute.sting.gatk.refdata.RODRecordList;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype;
import org.broadinstitute.sting.utils.genotype.Variation;
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeCall;
import java.io.FileNotFoundException;
import java.io.PrintWriter;
import java.util.HashMap;
import java.util.*;
/**
* Split up two call sets into their various concordance sets
@ -19,58 +15,42 @@ public class SNPGenotypeConcordance implements ConcordanceType {
private double LOD = 5.0;
private PrintWriter sameVarWriter = null;
private PrintWriter oneVarWriter = null;
private PrintWriter sameAlleleWriter = null;
private PrintWriter diffAlleleWriter = null;
private PrintWriter refVsVar1Writer = null;
private PrintWriter refVsVar2Writer = null;
private PrintWriter comboVarWriter = null;
private PrintWriter coverageVar1Writer = null;
private PrintWriter coverageVar2Writer = null;
private String sample1, sample2;
public SNPGenotypeConcordance() {}
public void initialize(String prefix, HashMap<String,String> args) {
try {
sameVarWriter = new PrintWriter(prefix + ".snp_concordance.sameConfidentVariant.calls");
oneVarWriter = new PrintWriter(prefix + ".snp_concordance.bothVariantOnlyOneIsConfident.calls");
sameAlleleWriter = new PrintWriter(prefix + ".snp_concordance.sameVariantAlleleDifferentGenotype.calls");
diffAlleleWriter = new PrintWriter(prefix + ".snp_concordance.differentVariantAllele.calls");
refVsVar1Writer = new PrintWriter(prefix + ".snp_concordance.set1VariantSet2Ref.calls");
refVsVar2Writer = new PrintWriter(prefix + ".snp_concordance.set1RefSet2Variant.calls");
comboVarWriter = new PrintWriter(prefix + ".snp_concordance.confidentVariantWhenCombined.calls");
coverageVar1Writer = new PrintWriter(prefix + ".snp_concordance.set1ConfidentVariantSet2NoCoverage.calls");
coverageVar2Writer = new PrintWriter(prefix + ".snp_concordance.set1NoCoverageSet2ConfidentVariant.calls");
} catch (FileNotFoundException e) {
throw new StingException(String.format("Could not open file(s) for writing"));
}
public void initialize(Map<String, String> args, Set<String> samples) {
if ( samples.size() != 2 )
throw new StingException("SNPGenotype concordance test cannot handle anything other than 2 VCF records");
if ( args.get("lod") != null )
LOD = Double.valueOf(args.get("lod"));
Iterator<String> iter = samples.iterator();
sample1 = iter.next();
sample2 = iter.next();
}
public void computeConcordance(RefMetaDataTracker tracker, ReferenceContext ref) {
RODRecordList<ReferenceOrderedDatum> call1List = tracker.getTrackData("callset1", null);
RODRecordList<ReferenceOrderedDatum> call2List = tracker.getTrackData("callset2", null);
Variation call1 = (call1List == null ? null : (Variation)call1List.getRecords().get(0));
Variation call2 = (call2List == null ? null : (Variation)call2List.getRecords().get(0));
public String computeConcordance(Map<String, VCFGenotypeCall> samplesToRecords, ReferenceContext ref) {
VCFGenotypeCall vcfCall1 = samplesToRecords.get(sample1);
VCFGenotypeCall vcfCall2 = samplesToRecords.get(sample2);
Variation call1 = (vcfCall1 == null ? null : vcfCall1.toVariation());
Variation call2 = (vcfCall2 == null ? null : vcfCall2.toVariation());
// the only reason they would be null is a lack of coverage
if ( call1 == null || call2 == null ) {
if ( call1 != null && call1.isSNP() && call1.getNegLog10PError() >= LOD )
printVariant(coverageVar1Writer, call1);
return "set1ConfidentVariantSet2NoCoverage";
else if ( call2 != null && call2.isSNP() && call2.getNegLog10PError() >= LOD )
printVariant(coverageVar2Writer, call2);
return;
return "set1NoCoverageSet2ConfidentVariant";
return null;
}
if (!(call1 instanceof VariantBackedByGenotype) || !(call2 instanceof VariantBackedByGenotype))
throw new StingException("Both parents ROD tracks must be backed by genotype data. Ensure that your rod(s) contain genotyping information");
throw new StingException("Both ROD tracks must be backed by genotype data. Ensure that your rod(s) contain genotyping information");
double bestVsRef1 = call1.getNegLog10PError();
double bestVsRef2 = call2.getNegLog10PError();
//double bestVsNext1 = call1.getConsensusConfidence();
//double bestVsNext2 = call2.getConsensusConfidence();
String genotype1 = ((VariantBackedByGenotype)call1).getCalledGenotype().getBases();
String genotype2 = ((VariantBackedByGenotype)call2).getCalledGenotype().getBases();
@ -81,33 +61,35 @@ public class SNPGenotypeConcordance implements ConcordanceType {
if ( bestVsRef1 >= LOD && bestVsRef2 >= LOD ) {
// same genotype
if ( genotype1.equals(genotype2) )
printVariant(sameVarWriter, call1);
return "sameConfidentVariant";
// same allele, different genotype
else if ( sameVariantAllele(genotype1, genotype2, ref.getBase()) )
printVariant(sameAlleleWriter, call1);
return "sameVariantAlleleDifferentGenotype";
// different variant allele
else
printVariant(diffAlleleWriter, call1);
return "differentVariantAllele";
}
// confident only when combined
else if ( bestVsRef1 < LOD && bestVsRef2 < LOD && bestVsRef1 + bestVsRef2 >= LOD ) {
printVariant(comboVarWriter, call1);
return "confidentVariantWhenCombined";
}
// only one is confident variant
else if ( (bestVsRef1 < LOD && bestVsRef2 >= LOD) || (bestVsRef1 >= LOD && bestVsRef2 < LOD) ) {
printVariant(oneVarWriter, call1);
return "bothVariantOnlyOneIsConfident";
}
}
// one is variant and the other is ref
else if ( call1.isSNP() && call2.isReference() && bestVsRef1 >= LOD )
printVariant(refVsVar1Writer, call1);
return "set1VariantSet2Ref";
else if ( call2.isSNP() && call1.isReference() && bestVsRef2 >= LOD )
printVariant(refVsVar2Writer, call2);
return "set1RefSet2Variant";
return null;
}
private boolean sameVariantAllele(String genotype1, String genotype2, char ref) {
@ -118,19 +100,5 @@ public class SNPGenotypeConcordance implements ConcordanceType {
return altAllele1 == altAllele2;
}
private static void printVariant(PrintWriter writer, Variation variant) {
writer.println(variant.toString());
}
public void cleanup() {
sameVarWriter.close();
oneVarWriter.close();
sameAlleleWriter.close();
diffAlleleWriter.close();
refVsVar1Writer.close();
refVsVar2Writer.close();
comboVarWriter.close();
coverageVar1Writer.close();
coverageVar2Writer.close();
}
public String getInfoName() { return "SG"; }
}

View File

@ -1,81 +1,62 @@
package org.broadinstitute.sting.playground.gatk.walkers.concordance;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import org.broadinstitute.sting.gatk.refdata.RODRecordList;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.genotype.Variation;
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeCall;
import org.broadinstitute.sting.utils.StingException;
import java.io.FileNotFoundException;
import java.io.PrintWriter;
import java.util.HashMap;
import java.util.*;
/**
* Split up two call sets into their various "Venn diagram" sets
*/
public class SimpleVenn implements ConcordanceType {
private PrintWriter union_writer = null;
private PrintWriter intersect_writer = null;
private PrintWriter discord_writer = null;
private PrintWriter set1_writer = null;
private PrintWriter set2_writer = null;
private String sample1, sample2;
public SimpleVenn() {}
public void initialize(String prefix, HashMap<String,String> args) {
try {
union_writer = new PrintWriter(prefix + ".venn.union.calls");
intersect_writer = new PrintWriter(prefix + ".venn.intersection.calls");
discord_writer = new PrintWriter(prefix + ".venn.discordant.calls");
set1_writer = new PrintWriter(prefix + ".venn.set1Only.calls");
set2_writer = new PrintWriter(prefix + ".venn.set2Only.calls");
} catch (FileNotFoundException e) {
throw new StingException(String.format("Could not open file(s) for writing"));
}
public void initialize(Map<String, String> args, Set<String> samples) {
if ( samples.size() != 2 )
throw new StingException("SimpleVenn concordance test cannot handle anything other than 2 VCF records");
Iterator<String> iter = samples.iterator();
sample1 = iter.next();
sample2 = iter.next();
}
public void computeConcordance(RefMetaDataTracker tracker, ReferenceContext ref) {
RODRecordList<ReferenceOrderedDatum> call1List = tracker.getTrackData("callset1", null);
RODRecordList<ReferenceOrderedDatum> call2List = tracker.getTrackData("callset2", null);
Variation call1 = (call1List == null ? null : (Variation)call1List.getRecords().get(0));
Variation call2 = (call2List == null ? null : (Variation)call2List.getRecords().get(0));
public String computeConcordance(Map<String, VCFGenotypeCall> samplesToRecords, ReferenceContext ref) {
VCFGenotypeCall call1 = samplesToRecords.get(sample1);
VCFGenotypeCall call2 = samplesToRecords.get(sample2);
if ( call1 == null && call2 == null )
return;
// union
printVariant(union_writer, call1 != null ? call1 : call2);
return null;
// set 1 only
if ( call2 == null )
printVariant(set1_writer, call1);
return sample1 + "_only";
// set 2 only
else if ( call1 == null )
printVariant(set2_writer, call2);
return sample2 + "_only";
// at this point we know that neither is null, so now we need to test for alternate allele concordance
Variation callV1 = call1.toVariation();
Variation callV2 = call2.toVariation();
// we can't really deal with multi-allelic variants
else if ( call1.isBiallelic() && call2.isBiallelic() ) {
if ( callV1.isBiallelic() && callV2.isBiallelic() ) {
// intersection (concordant)
if ( call1.getAlternativeBaseForSNP() == call2.getAlternativeBaseForSNP() )
printVariant(intersect_writer, call1);
if ( callV1.getAlternativeBaseForSNP() == callV2.getAlternativeBaseForSNP() )
return "concordant";
// intersection (discordant)
else
printVariant(discord_writer, call1);
return "discordant";
}
return "concordant";
}
private static void printVariant(PrintWriter writer, Variation variant) {
writer.println(variant.toString());
}
public void cleanup() {
union_writer.close();
intersect_writer.close();
discord_writer.close();
set1_writer.close();
set2_writer.close();
}
public String getInfoName() { return "SV"; }
}