Updated the CallsetConcordance classes to use new VCF Variation code... and uncovered a whole bunch of VCF bugs in the process. I'm not convinced that I got them all, so I'll unit test like crazy when the refactoring is done.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2272 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
b6f8e33f4c
commit
c0528cd88e
|
|
@ -259,6 +259,16 @@ public class RodVCF extends BasicReferenceOrderedDatum implements VariationRod,
|
||||||
return mCurrentRecord.getGenotypes();
|
return mCurrentRecord.getGenotypes();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* get the genotypes
|
||||||
|
*
|
||||||
|
* @return a list of the genotypes
|
||||||
|
*/
|
||||||
|
public List<VCFGenotypeRecord> getVCFGenotypeRecords() {
|
||||||
|
assertNotNull();
|
||||||
|
return mCurrentRecord.getVCFGenotypeRecords();
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* do we have the specified genotype? not all backedByGenotypes
|
* do we have the specified genotype? not all backedByGenotypes
|
||||||
* have all the genotype data.
|
* have all the genotype data.
|
||||||
|
|
|
||||||
|
|
@ -130,19 +130,15 @@ public class CallsetConcordanceWalker extends RodWalker<Integer, Integer> {
|
||||||
|
|
||||||
// pull out all of the individual calls from the rods and insert into a map based on the
|
// pull out all of the individual calls from the rods and insert into a map based on the
|
||||||
// mapping from rod/sample to uniquified name
|
// mapping from rod/sample to uniquified name
|
||||||
HashMap<String, VCFGenotypeCall> samplesToRecords = new HashMap<String, VCFGenotypeCall>();
|
HashMap<String, Genotype> samplesToRecords = new HashMap<String, Genotype>();
|
||||||
for ( RodVCF rod : vcfRods ) {
|
for ( RodVCF rod : vcfRods ) {
|
||||||
List<Genotype> records = rod.getGenotypes();
|
List<VCFGenotypeRecord> records = rod.getVCFGenotypeRecords();
|
||||||
for ( Genotype g : records ) {
|
for ( VCFGenotypeRecord vcfRec : records ) {
|
||||||
if ( !(g instanceof VCFGenotypeCall) )
|
String uniquifiedSample = rodNamesToSampleNames.get(new Pair<String, String>(rod.getName(), vcfRec.getSampleName()));
|
||||||
throw new StingException("Expected VCF rod Genotypes to be of type VCFGenotypeCall");
|
|
||||||
|
|
||||||
VCFGenotypeCall vcfCall = (VCFGenotypeCall)g;
|
|
||||||
String uniquifiedSample = rodNamesToSampleNames.get(new Pair<String, String>(rod.getName(), vcfCall.getSampleName()));
|
|
||||||
if ( uniquifiedSample == null )
|
if ( uniquifiedSample == null )
|
||||||
throw new StingException("Unexpected sample encountered: " + vcfCall.getSampleName() + " in rod " + rod.getName());
|
throw new StingException("Unexpected sample encountered: " + vcfRec.getSampleName() + " in rod " + rod.getName());
|
||||||
|
|
||||||
samplesToRecords.put(uniquifiedSample, vcfCall);
|
samplesToRecords.put(uniquifiedSample, vcfRec);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -173,4 +169,4 @@ public class CallsetConcordanceWalker extends RodWalker<Integer, Integer> {
|
||||||
vcfWriter.close();
|
vcfWriter.close();
|
||||||
out.printf("Processed %d loci.\n", result);
|
out.printf("Processed %d loci.\n", result);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.concordance;
|
package org.broadinstitute.sting.gatk.walkers.concordance;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeCall;
|
import org.broadinstitute.sting.utils.genotype.Genotype;
|
||||||
|
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
@ -9,6 +9,6 @@ import java.util.Set;
|
||||||
public interface ConcordanceType {
|
public interface ConcordanceType {
|
||||||
|
|
||||||
public void initialize(Map<String,String> args, Set<String> samples);
|
public void initialize(Map<String,String> args, Set<String> samples);
|
||||||
public String computeConcordance(Map<String, VCFGenotypeCall> samplesToRecords, ReferenceContext ref);
|
public String computeConcordance(Map<String, Genotype> samplesToRecords, ReferenceContext ref);
|
||||||
public String getInfoName();
|
public String getInfoName();
|
||||||
}
|
}
|
||||||
|
|
@ -4,7 +4,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.StingException;
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
import org.broadinstitute.sting.utils.genotype.Variation;
|
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeCall;
|
import org.broadinstitute.sting.utils.genotype.Genotype;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
|
|
@ -53,10 +53,10 @@ public class IndelSubsets implements ConcordanceType {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public String computeConcordance(Map<String, VCFGenotypeCall> samplesToRecords, ReferenceContext ref) {
|
public String computeConcordance(Map<String, Genotype> samplesToRecords, ReferenceContext ref) {
|
||||||
|
|
||||||
VCFGenotypeCall indel1 = samplesToRecords.get(sample1);
|
Genotype indel1 = samplesToRecords.get(sample1);
|
||||||
VCFGenotypeCall indel2 = samplesToRecords.get(sample2);
|
Genotype indel2 = samplesToRecords.get(sample2);
|
||||||
|
|
||||||
int set1 = ( indel1 != null && !indel1.isPointGenotype() ? 0 : 1 );
|
int set1 = ( indel1 != null && !indel1.isPointGenotype() ? 0 : 1 );
|
||||||
int set2 = ( indel2 != null && !indel2.isPointGenotype() ? 0 : 1 );
|
int set2 = ( indel2 != null && !indel2.isPointGenotype() ? 0 : 1 );
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.concordance;
|
package org.broadinstitute.sting.gatk.walkers.concordance;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeCall;
|
import org.broadinstitute.sting.utils.genotype.Genotype;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import java.util.Map.Entry;
|
import java.util.Map.Entry;
|
||||||
|
|
@ -18,12 +18,12 @@ public class NWayVenn implements ConcordanceType {
|
||||||
|
|
||||||
public void initialize(Map<String, String> args, Set<String> samples) { }
|
public void initialize(Map<String, String> args, Set<String> samples) { }
|
||||||
|
|
||||||
public String computeConcordance(Map<String, VCFGenotypeCall> samplesToRecords, ReferenceContext ref) {
|
public String computeConcordance(Map<String, Genotype> samplesToRecords, ReferenceContext ref) {
|
||||||
if ( samplesToRecords.size() == 0 )
|
if ( samplesToRecords.size() == 0 )
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
TreeSet<String> concordantSamples = new TreeSet<String>();
|
TreeSet<String> concordantSamples = new TreeSet<String>();
|
||||||
for ( Entry<String, VCFGenotypeCall> entry : samplesToRecords.entrySet() ) {
|
for ( Entry<String, Genotype> entry : samplesToRecords.entrySet() ) {
|
||||||
concordantSamples.add(entry.getKey());
|
concordantSamples.add(entry.getKey());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -39,4 +39,4 @@ public class NWayVenn implements ConcordanceType {
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getInfoName() { return "NwayVenn"; }
|
public String getInfoName() { return "NwayVenn"; }
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,7 @@ package org.broadinstitute.sting.gatk.walkers.concordance;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.utils.StingException;
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeCall;
|
import org.broadinstitute.sting.utils.genotype.Genotype;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
|
|
@ -29,10 +29,10 @@ public class SNPGenotypeConcordance implements ConcordanceType {
|
||||||
sample2 = iter.next();
|
sample2 = iter.next();
|
||||||
}
|
}
|
||||||
|
|
||||||
public String computeConcordance(Map<String, VCFGenotypeCall> samplesToRecords, ReferenceContext ref) {
|
public String computeConcordance(Map<String, Genotype> samplesToRecords, ReferenceContext ref) {
|
||||||
|
|
||||||
VCFGenotypeCall call1 = samplesToRecords.get(sample1);
|
Genotype call1 = samplesToRecords.get(sample1);
|
||||||
VCFGenotypeCall call2 = samplesToRecords.get(sample2);
|
Genotype call2 = samplesToRecords.get(sample2);
|
||||||
|
|
||||||
// the only reason they would be null is a lack of coverage
|
// the only reason they would be null is a lack of coverage
|
||||||
if ( call1 == null || call2 == null ) {
|
if ( call1 == null || call2 == null ) {
|
||||||
|
|
@ -78,9 +78,9 @@ public class SNPGenotypeConcordance implements ConcordanceType {
|
||||||
}
|
}
|
||||||
|
|
||||||
// one is variant and the other is ref
|
// one is variant and the other is ref
|
||||||
else if ( call1.isPointGenotype() && call2.isVariant() && confidence1 >= Qscore )
|
else if ( call1.isPointGenotype() && call2.isVariant(ref.getBase()) && confidence1 >= Qscore )
|
||||||
return "set1VariantSet2Ref";
|
return "set1VariantSet2Ref";
|
||||||
else if ( call2.isPointGenotype() && call1.isVariant() && confidence2 >= Qscore )
|
else if ( call2.isPointGenotype() && call1.isVariant(ref.getBase()) && confidence2 >= Qscore )
|
||||||
return "set1RefSet2Variant";
|
return "set1RefSet2Variant";
|
||||||
|
|
||||||
return null;
|
return null;
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,7 @@ package org.broadinstitute.sting.gatk.walkers.concordance;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.utils.genotype.Variation;
|
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeCall;
|
import org.broadinstitute.sting.utils.genotype.Genotype;
|
||||||
import org.broadinstitute.sting.utils.StingException;
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
@ -25,10 +25,10 @@ public class SimpleVenn implements ConcordanceType {
|
||||||
sample2 = iter.next();
|
sample2 = iter.next();
|
||||||
}
|
}
|
||||||
|
|
||||||
public String computeConcordance(Map<String, VCFGenotypeCall> samplesToRecords, ReferenceContext ref) {
|
public String computeConcordance(Map<String, Genotype> samplesToRecords, ReferenceContext ref) {
|
||||||
|
|
||||||
VCFGenotypeCall call1 = samplesToRecords.get(sample1);
|
Genotype call1 = samplesToRecords.get(sample1);
|
||||||
VCFGenotypeCall call2 = samplesToRecords.get(sample2);
|
Genotype call2 = samplesToRecords.get(sample2);
|
||||||
|
|
||||||
if ( call1 == null && call2 == null )
|
if ( call1 == null && call2 == null )
|
||||||
return null;
|
return null;
|
||||||
|
|
|
||||||
|
|
@ -25,8 +25,8 @@ public class VCFGenotypeEncoding {
|
||||||
public VCFGenotypeEncoding(String baseString) {
|
public VCFGenotypeEncoding(String baseString) {
|
||||||
if ((baseString.length() == 1)) {
|
if ((baseString.length() == 1)) {
|
||||||
// are we an empty (no-call) genotype?
|
// are we an empty (no-call) genotype?
|
||||||
if (baseString.equals(VCFGenotypeRecord.EMPTY_GENOTYPE)) {
|
if (baseString.equals(VCFGenotypeRecord.EMPTY_ALLELE)) {
|
||||||
mBases = VCFGenotypeRecord.EMPTY_GENOTYPE;
|
mBases = VCFGenotypeRecord.EMPTY_ALLELE;
|
||||||
mLength = 0;
|
mLength = 0;
|
||||||
mType = TYPE.UNCALLED;
|
mType = TYPE.UNCALLED;
|
||||||
} else if (!validBases(baseString)) {
|
} else if (!validBases(baseString)) {
|
||||||
|
|
@ -67,10 +67,14 @@ public class VCFGenotypeEncoding {
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean equals(Object obj) {
|
public boolean equals(Object obj) {
|
||||||
if (obj != null && (obj.getClass().equals(this.getClass()))) {
|
if ( obj == null )
|
||||||
|
return false;
|
||||||
|
if ( obj instanceof VCFGenotypeEncoding ) {
|
||||||
VCFGenotypeEncoding d = (VCFGenotypeEncoding) obj;
|
VCFGenotypeEncoding d = (VCFGenotypeEncoding) obj;
|
||||||
return (mType == d.mType) && (mBases.equals(d.mBases)) && (mLength == d.mLength);
|
return (mType == d.mType) && (mBases.equals(d.mBases)) && (mLength == d.mLength);
|
||||||
}
|
}
|
||||||
|
if ( mType == TYPE.UNCALLED && obj.toString().equals(VCFGenotypeRecord.EMPTY_ALLELE) )
|
||||||
|
return true;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -84,7 +88,7 @@ public class VCFGenotypeEncoding {
|
||||||
/**
|
/**
|
||||||
* dump the string representation of this genotype encoding
|
* dump the string representation of this genotype encoding
|
||||||
*
|
*
|
||||||
* @return
|
* @return string representation
|
||||||
*/
|
*/
|
||||||
public String toString() {
|
public String toString() {
|
||||||
StringBuilder builder = new StringBuilder();
|
StringBuilder builder = new StringBuilder();
|
||||||
|
|
|
||||||
|
|
@ -16,8 +16,11 @@ import java.util.Map;
|
||||||
* <p/>
|
* <p/>
|
||||||
*/
|
*/
|
||||||
public class VCFGenotypeRecord implements Genotype {
|
public class VCFGenotypeRecord implements Genotype {
|
||||||
// the symbol for a empty genotype
|
// the symbols for an empty genotype
|
||||||
public static final String EMPTY_GENOTYPE = ".";
|
public static final String EMPTY_GENOTYPE = "./.";
|
||||||
|
public static final String EMPTY_ALLELE = ".";
|
||||||
|
|
||||||
|
public static final int MISSING_DEPTH = -1;
|
||||||
|
|
||||||
// what kind of phasing this genotype has
|
// what kind of phasing this genotype has
|
||||||
public enum PHASE {
|
public enum PHASE {
|
||||||
|
|
@ -37,7 +40,7 @@ public class VCFGenotypeRecord implements Genotype {
|
||||||
private final Map<String, String> mFields = new HashMap<String, String>();
|
private final Map<String, String> mFields = new HashMap<String, String>();
|
||||||
|
|
||||||
// our sample name
|
// our sample name
|
||||||
private final String mSampleName;
|
private String mSampleName;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a VCF genotype record
|
* Create a VCF genotype record
|
||||||
|
|
@ -58,6 +61,10 @@ public class VCFGenotypeRecord implements Genotype {
|
||||||
this.mRecord = record;
|
this.mRecord = record;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void setSampleName(String name) {
|
||||||
|
mSampleName = name;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* determine the phase of the genotype
|
* determine the phase of the genotype
|
||||||
*
|
*
|
||||||
|
|
@ -98,6 +105,15 @@ public class VCFGenotypeRecord implements Genotype {
|
||||||
return ( mFields.containsKey("GQ") ? Double.valueOf(mFields.get("GQ")) / 10.0 : 0.0);
|
return ( mFields.containsKey("GQ") ? Double.valueOf(mFields.get("GQ")) / 10.0 : 0.0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public int getReadCount() {
|
||||||
|
int depth = MISSING_DEPTH;
|
||||||
|
if ( mFields.containsKey("RD") )
|
||||||
|
depth = Integer.valueOf(mFields.get("RD"));
|
||||||
|
else if ( mFields.containsKey("DP") )
|
||||||
|
depth = Integer.valueOf(mFields.get("DP"));
|
||||||
|
return depth;
|
||||||
|
}
|
||||||
|
|
||||||
public GenomeLoc getLocation() {
|
public GenomeLoc getLocation() {
|
||||||
return mRecord != null ? mRecord.getLocation() : null;
|
return mRecord != null ? mRecord.getLocation() : null;
|
||||||
}
|
}
|
||||||
|
|
@ -157,7 +173,7 @@ public class VCFGenotypeRecord implements Genotype {
|
||||||
boolean first = true;
|
boolean first = true;
|
||||||
for (VCFGenotypeEncoding allele : mGenotypeAlleles) {
|
for (VCFGenotypeEncoding allele : mGenotypeAlleles) {
|
||||||
if (allele.getType() == VCFGenotypeEncoding.TYPE.UNCALLED)
|
if (allele.getType() == VCFGenotypeEncoding.TYPE.UNCALLED)
|
||||||
str += VCFGenotypeRecord.EMPTY_GENOTYPE;
|
str += VCFGenotypeRecord.EMPTY_ALLELE;
|
||||||
else
|
else
|
||||||
str += String.valueOf((altAlleles.contains(allele)) ? altAlleles.indexOf(allele) + 1 : 0);
|
str += String.valueOf((altAlleles.contains(allele)) ? altAlleles.indexOf(allele) + 1 : 0);
|
||||||
if (first) {
|
if (first) {
|
||||||
|
|
|
||||||
|
|
@ -213,8 +213,8 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter {
|
||||||
|
|
||||||
|
|
||||||
List<VCFGenotypeEncoding> alleles = new ArrayList<VCFGenotypeEncoding>();
|
List<VCFGenotypeEncoding> alleles = new ArrayList<VCFGenotypeEncoding>();
|
||||||
alleles.add(new VCFGenotypeEncoding(VCFGenotypeRecord.EMPTY_GENOTYPE));
|
alleles.add(new VCFGenotypeEncoding(VCFGenotypeRecord.EMPTY_ALLELE));
|
||||||
alleles.add(new VCFGenotypeEncoding(VCFGenotypeRecord.EMPTY_GENOTYPE));
|
alleles.add(new VCFGenotypeEncoding(VCFGenotypeRecord.EMPTY_ALLELE));
|
||||||
|
|
||||||
VCFGenotypeRecord record = new VCFGenotypeRecord(sampleName,
|
VCFGenotypeRecord record = new VCFGenotypeRecord(sampleName,
|
||||||
alleles,
|
alleles,
|
||||||
|
|
|
||||||
|
|
@ -65,7 +65,9 @@ class VCFParameters {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void addAlternateBase(VCFGenotypeEncoding base) {
|
public void addAlternateBase(VCFGenotypeEncoding base) {
|
||||||
if (!alternateBases.contains(base) && !base.toString().equals(String.valueOf(this.getReferenceBase())))
|
if ( !alternateBases.contains(base) &&
|
||||||
|
!base.toString().equals(String.valueOf(getReferenceBase())) &&
|
||||||
|
!base.toString().equals(VCFGenotypeRecord.EMPTY_ALLELE) )
|
||||||
alternateBases.add(base);
|
alternateBases.add(base);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -206,15 +206,20 @@ public class VCFReader implements Iterator<VCFRecord>, Iterable<VCFRecord> {
|
||||||
// if we have genotyping data, we try and extract the genotype fields
|
// if we have genotyping data, we try and extract the genotype fields
|
||||||
if (mHeader.hasGenotypingData()) {
|
if (mHeader.hasGenotypingData()) {
|
||||||
String mFormatString = tokens[index];
|
String mFormatString = tokens[index];
|
||||||
|
String keyStrings[] = mFormatString.split(":");
|
||||||
List<VCFGenotypeRecord> genotypeRecords = new ArrayList<VCFGenotypeRecord>();
|
List<VCFGenotypeRecord> genotypeRecords = new ArrayList<VCFGenotypeRecord>();
|
||||||
index++;
|
index++;
|
||||||
String[] alt_alleles = values.get(VCFHeader.HEADER_FIELDS.ALT).split(",");
|
String[] alt_alleles = values.get(VCFHeader.HEADER_FIELDS.ALT).split(",");
|
||||||
for (String str : mHeader.getGenotypeSamples()) {
|
for (String str : mHeader.getGenotypeSamples()) {
|
||||||
if (!tokens[index].equalsIgnoreCase(VCFGenotypeRecord.EMPTY_GENOTYPE))
|
genotypeRecords.add(getVCFGenotype(str, keyStrings, tokens[index], alt_alleles, values.get(VCFHeader.HEADER_FIELDS.REF).charAt(0)));
|
||||||
genotypeRecords.add(getVCFGenotype(str, mFormatString, tokens[index], alt_alleles, values.get(VCFHeader.HEADER_FIELDS.REF).charAt(0)));
|
|
||||||
index++;
|
index++;
|
||||||
}
|
}
|
||||||
return new VCFRecord(values, mFormatString, genotypeRecords);
|
VCFRecord vrec = new VCFRecord(values, mFormatString, genotypeRecords);
|
||||||
|
// associate the genotypes with this new record
|
||||||
|
for ( VCFGenotypeRecord gr : genotypeRecords )
|
||||||
|
gr.setVCFRecord(vrec);
|
||||||
|
return vrec;
|
||||||
|
|
||||||
}
|
}
|
||||||
return new VCFRecord(values);
|
return new VCFRecord(values);
|
||||||
}
|
}
|
||||||
|
|
@ -231,11 +236,25 @@ public class VCFReader implements Iterator<VCFRecord>, Iterable<VCFRecord> {
|
||||||
* @return a VCFGenotypeRecord
|
* @return a VCFGenotypeRecord
|
||||||
*/
|
*/
|
||||||
public static VCFGenotypeRecord getVCFGenotype(String sampleName, String formatString, String genotypeString, String altAlleles[], char referenceBase) {
|
public static VCFGenotypeRecord getVCFGenotype(String sampleName, String formatString, String genotypeString, String altAlleles[], char referenceBase) {
|
||||||
|
return getVCFGenotype(sampleName, formatString.split(":"), genotypeString, altAlleles, referenceBase);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* generate a VCF genotype record, given it's format string, the genotype string, and allele info
|
||||||
|
*
|
||||||
|
* @param sampleName the sample name
|
||||||
|
* @param keyStrings the split format string for this record, which contains the keys for the genotype parameters
|
||||||
|
* @param genotypeString contains the phasing information, allele information, and values for genotype parameters
|
||||||
|
* @param altAlleles the alternate allele string array, which we index into based on the field parameters
|
||||||
|
* @param referenceBase the reference base
|
||||||
|
*
|
||||||
|
* @return a VCFGenotypeRecord
|
||||||
|
*/
|
||||||
|
public static VCFGenotypeRecord getVCFGenotype(String sampleName, String[] keyStrings, String genotypeString, String altAlleles[], char referenceBase) {
|
||||||
// parameters to create the VCF genotype record
|
// parameters to create the VCF genotype record
|
||||||
Map<String, String> tagToValue = new HashMap<String, String>();
|
Map<String, String> tagToValue = new HashMap<String, String>();
|
||||||
VCFGenotypeRecord.PHASE phase = VCFGenotypeRecord.PHASE.UNKNOWN;
|
VCFGenotypeRecord.PHASE phase = VCFGenotypeRecord.PHASE.UNKNOWN;
|
||||||
List<VCFGenotypeEncoding> bases = new ArrayList<VCFGenotypeEncoding>();
|
List<VCFGenotypeEncoding> bases = new ArrayList<VCFGenotypeEncoding>();
|
||||||
String keyStrings[] = formatString.split(":");
|
|
||||||
|
|
||||||
for (String key : keyStrings) {
|
for (String key : keyStrings) {
|
||||||
String parse;
|
String parse;
|
||||||
|
|
@ -260,11 +279,13 @@ public class VCFReader implements Iterator<VCFRecord>, Iterable<VCFRecord> {
|
||||||
if (nextDivider + 1 >= genotypeString.length()) nextDivider = genotypeString.length() - 1;
|
if (nextDivider + 1 >= genotypeString.length()) nextDivider = genotypeString.length() - 1;
|
||||||
genotypeString = genotypeString.substring(nextDivider + 1, genotypeString.length());
|
genotypeString = genotypeString.substring(nextDivider + 1, genotypeString.length());
|
||||||
}
|
}
|
||||||
|
if ( bases.size() > 0 && bases.get(0).equals(VCFGenotypeRecord.EMPTY_ALLELE) )
|
||||||
|
tagToValue.clear();
|
||||||
// catch some common errors, either there are too many field keys or there are two many field values
|
// catch some common errors, either there are too many field keys or there are two many field values
|
||||||
if (keyStrings.length != tagToValue.size() + ((bases.size() > 0) ? 1 : 0))
|
else if ( keyStrings.length != tagToValue.size() + ((bases.size() > 0) ? 1 : 0))
|
||||||
throw new RuntimeException("VCFReader: genotype value count doesn't match the key count (expected "
|
throw new RuntimeException("VCFReader: genotype value count doesn't match the key count (expected "
|
||||||
+ keyStrings.length + " but saw " + tagToValue.size() + ")");
|
+ keyStrings.length + " but saw " + tagToValue.size() + ")");
|
||||||
else if (genotypeString.length() > 0)
|
else if ( genotypeString.length() > 0 )
|
||||||
throw new RuntimeException("VCFReader: genotype string contained additional unprocessed fields: " + genotypeString
|
throw new RuntimeException("VCFReader: genotype string contained additional unprocessed fields: " + genotypeString
|
||||||
+ ". This most likely means that the format string is shorter then the value fields.");
|
+ ". This most likely means that the format string is shorter then the value fields.");
|
||||||
return new VCFGenotypeRecord(sampleName, bases, phase, tagToValue);
|
return new VCFGenotypeRecord(sampleName, bases, phase, tagToValue);
|
||||||
|
|
@ -280,8 +301,8 @@ public class VCFReader implements Iterator<VCFRecord>, Iterable<VCFRecord> {
|
||||||
* @param bases the list of bases for this genotype call
|
* @param bases the list of bases for this genotype call
|
||||||
*/
|
*/
|
||||||
private static void addAllele(String alleleNumber, String[] altAlleles, char referenceBase, List<VCFGenotypeEncoding> bases) {
|
private static void addAllele(String alleleNumber, String[] altAlleles, char referenceBase, List<VCFGenotypeEncoding> bases) {
|
||||||
if (alleleNumber.equals(VCFGenotypeRecord.EMPTY_GENOTYPE)) {
|
if (alleleNumber.equals(VCFGenotypeRecord.EMPTY_ALLELE)) {
|
||||||
bases.add(new VCFGenotypeEncoding(VCFGenotypeRecord.EMPTY_GENOTYPE));
|
bases.add(new VCFGenotypeEncoding(VCFGenotypeRecord.EMPTY_ALLELE));
|
||||||
} else {
|
} else {
|
||||||
int alleleValue = Integer.valueOf(alleleNumber);
|
int alleleValue = Integer.valueOf(alleleNumber);
|
||||||
// check to make sure the allele value is within bounds
|
// check to make sure the allele value is within bounds
|
||||||
|
|
|
||||||
|
|
@ -85,11 +85,7 @@ public class VCFRecord implements Variation, VariantBackedByGenotype {
|
||||||
this.setQual(qual);
|
this.setQual(qual);
|
||||||
this.setFilterString(filters);
|
this.setFilterString(filters);
|
||||||
this.mInfoFields.putAll(infoFields);
|
this.mInfoFields.putAll(infoFields);
|
||||||
mGenotypeFormatString = genotypeFormatString;
|
this.mGenotypeFormatString = genotypeFormatString;
|
||||||
|
|
||||||
// associate the genotypes with this Variation, then add them
|
|
||||||
for ( VCFGenotypeRecord rec : genotypeObjects )
|
|
||||||
rec.setVCFRecord(this);
|
|
||||||
this.mGenotypeFields.addAll(genotypeObjects);
|
this.mGenotypeFields.addAll(genotypeObjects);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -5,8 +5,7 @@ import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RodVCF;
|
import org.broadinstitute.sting.gatk.refdata.RodVCF;
|
||||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||||
import org.broadinstitute.sting.utils.Pair;
|
import org.broadinstitute.sting.utils.Pair;
|
||||||
import org.broadinstitute.sting.utils.StingException;
|
import org.broadinstitute.sting.utils.genotype.*;
|
||||||
import org.broadinstitute.sting.utils.genotype.Genotype;
|
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import java.util.Map.Entry;
|
import java.util.Map.Entry;
|
||||||
|
|
@ -116,17 +115,13 @@ public class VCFUtils {
|
||||||
int freqsSeen = 0;
|
int freqsSeen = 0;
|
||||||
|
|
||||||
for ( RodVCF rod : rods ) {
|
for ( RodVCF rod : rods ) {
|
||||||
List<Genotype> myGenotypes = rod.getGenotypes();
|
List<VCFGenotypeRecord> myGenotypes = rod.getVCFGenotypeRecords();
|
||||||
for ( Genotype g : myGenotypes ) {
|
for ( VCFGenotypeRecord call : myGenotypes ) {
|
||||||
if ( !(g instanceof VCFGenotypeCall) )
|
|
||||||
throw new StingException("Expected VCFGenotypeCall object but instead saw " + g.getClass().getSimpleName());
|
|
||||||
|
|
||||||
// set the name to be the new uniquified name and add it to the list of genotypes
|
// set the name to be the new uniquified name and add it to the list of genotypes
|
||||||
VCFGenotypeCall call = (VCFGenotypeCall)g;
|
|
||||||
call.setSampleName(rodNamesToSampleNames.get(new Pair<String, String>(rod.getName(), call.getSampleName())));
|
call.setSampleName(rodNamesToSampleNames.get(new Pair<String, String>(rod.getName(), call.getSampleName())));
|
||||||
if ( params.getPosition() < 1 )
|
if ( params.getPosition() < 1 )
|
||||||
params.setLocations(call.getLocation(), call.getReference());
|
params.setLocations(rod.getLocation(), call.getReference());
|
||||||
params.addGenotypeRecord(createVCFGenotypeRecord(params, call));
|
params.addGenotypeRecord(createVCFGenotypeRecord(params, call, rod.mCurrentRecord));
|
||||||
totalReadDepth += call.getReadCount();
|
totalReadDepth += call.getReadCount();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -168,6 +163,39 @@ public class VCFUtils {
|
||||||
params.getGenotypesRecords());
|
params.getGenotypesRecords());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* create the VCF genotype record
|
||||||
|
*
|
||||||
|
* @param params the VCF parameters object
|
||||||
|
* @param gtype the genotype
|
||||||
|
* @param vcfrecord the VCF record
|
||||||
|
*
|
||||||
|
* @return a VCFGenotypeRecord
|
||||||
|
*/
|
||||||
|
public static VCFGenotypeRecord createVCFGenotypeRecord(VCFParameters params, VCFGenotypeRecord gtype, VCFRecord vcfrecord) {
|
||||||
|
Map<String, String> map = new HashMap<String, String>();
|
||||||
|
|
||||||
|
// calculate the RMS mapping qualities and the read depth
|
||||||
|
int readDepth = gtype.getReadCount();
|
||||||
|
map.put("RD", String.valueOf(readDepth));
|
||||||
|
params.addFormatItem("RD");
|
||||||
|
double qual = 10.0 * gtype.getNegLog10PError();
|
||||||
|
map.put("GQ", String.format("%.2f", qual));
|
||||||
|
params.addFormatItem("GQ");
|
||||||
|
|
||||||
|
List<VCFGenotypeEncoding> alleles = createAlleleArray(gtype);
|
||||||
|
for (VCFGenotypeEncoding allele : alleles) {
|
||||||
|
params.addAlternateBase(allele);
|
||||||
|
}
|
||||||
|
|
||||||
|
VCFGenotypeRecord record = new VCFGenotypeRecord(gtype.getSampleName(),
|
||||||
|
alleles,
|
||||||
|
VCFGenotypeRecord.PHASE.UNPHASED,
|
||||||
|
map);
|
||||||
|
record.setVCFRecord(vcfrecord);
|
||||||
|
return record;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* create the VCF genotype record
|
* create the VCF genotype record
|
||||||
*
|
*
|
||||||
|
|
|
||||||
|
|
@ -61,7 +61,7 @@ public class SecondBaseSkewIntegrationTest extends WalkerTest {
|
||||||
+ " -R /seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta -A SecondBaseSkew"
|
+ " -R /seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta -A SecondBaseSkew"
|
||||||
+ " -sample variant -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/FHS_pileup_test_chr15.vcf"
|
+ " -sample variant -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/FHS_pileup_test_chr15.vcf"
|
||||||
+ " -vcf %s -L chr15:46347148";
|
+ " -vcf %s -L chr15:46347148";
|
||||||
String expected_md5 = "fe7f6d2b48ecf2c1340b5de98a9d5614";
|
String expected_md5 = "160a8e3826eb745bcfe2f463f73e1ec7";
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(test_args,1,Arrays.asList(expected_md5));
|
WalkerTestSpec spec = new WalkerTestSpec(test_args,1,Arrays.asList(expected_md5));
|
||||||
executeTest("Testing on locus with many indels", spec);
|
executeTest("Testing on locus with many indels", spec);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -14,7 +14,7 @@ public class CallsetConcordanceIntegrationTest extends WalkerTest {
|
||||||
public void testSimpleVenn() {
|
public void testSimpleVenn() {
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
baseTestString() + " -B set1,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.example1.vcf -B set2,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.example2.vcf -CT SimpleVenn", 1,
|
baseTestString() + " -B set1,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.example1.vcf -B set2,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.example2.vcf -CT SimpleVenn", 1,
|
||||||
Arrays.asList("2fc12b6f02f4cb589f2fd134e765d6b7"));
|
Arrays.asList("851b68004874f3a2e76d795e7401f8a0"));
|
||||||
executeTest("testSimpleVenn", spec);
|
executeTest("testSimpleVenn", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -22,7 +22,7 @@ public class CallsetConcordanceIntegrationTest extends WalkerTest {
|
||||||
public void testSNPConcordance() {
|
public void testSNPConcordance() {
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
baseTestString() + " -B set1,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.example1.vcf -B set2,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.example2.vcf -CT SNPGenotypeConcordance:qscore=5", 1,
|
baseTestString() + " -B set1,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.example1.vcf -B set2,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.example2.vcf -CT SNPGenotypeConcordance:qscore=5", 1,
|
||||||
Arrays.asList("142bcfcc6bb404cd4bd1a4624fa9a15e"));
|
Arrays.asList("7afb56b30257fe2d66bee7a029d75685"));
|
||||||
executeTest("testSNPConcordance", spec);
|
executeTest("testSNPConcordance", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -30,7 +30,7 @@ public class CallsetConcordanceIntegrationTest extends WalkerTest {
|
||||||
public void testNWayVenn() {
|
public void testNWayVenn() {
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
baseTestString() + " -B set1,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.example1.vcf -B set2,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.example2.vcf -B set3,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/CEU.sample.vcf -CT NWayVenn", 1,
|
baseTestString() + " -B set1,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.example1.vcf -B set2,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.example2.vcf -B set3,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/CEU.sample.vcf -CT NWayVenn", 1,
|
||||||
Arrays.asList("9a5910137b6b9745f6e0c3ee711a6bfa"));
|
Arrays.asList("f452c04c600ad10c054f18b0c77b53d5"));
|
||||||
executeTest("testNWayVenn", spec);
|
executeTest("testNWayVenn", spec);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Loading…
Reference in New Issue