diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/RodVCF.java b/java/src/org/broadinstitute/sting/gatk/refdata/RodVCF.java index 8253a30bc..708d87d6c 100755 --- a/java/src/org/broadinstitute/sting/gatk/refdata/RodVCF.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/RodVCF.java @@ -259,6 +259,16 @@ public class RodVCF extends BasicReferenceOrderedDatum implements VariationRod, return mCurrentRecord.getGenotypes(); } + /** + * get the genotypes + * + * @return a list of the genotypes + */ + public List getVCFGenotypeRecords() { + assertNotNull(); + return mCurrentRecord.getVCFGenotypeRecords(); + } + /** * do we have the specified genotype? not all backedByGenotypes * have all the genotype data. diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/concordance/CallsetConcordanceWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/concordance/CallsetConcordanceWalker.java index f0af463dc..2b1501f5c 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/concordance/CallsetConcordanceWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/concordance/CallsetConcordanceWalker.java @@ -130,19 +130,15 @@ public class CallsetConcordanceWalker extends RodWalker { // pull out all of the individual calls from the rods and insert into a map based on the // mapping from rod/sample to uniquified name - HashMap samplesToRecords = new HashMap(); + HashMap samplesToRecords = new HashMap(); for ( RodVCF rod : vcfRods ) { - List records = rod.getGenotypes(); - for ( Genotype g : records ) { - if ( !(g instanceof VCFGenotypeCall) ) - throw new StingException("Expected VCF rod Genotypes to be of type VCFGenotypeCall"); - - VCFGenotypeCall vcfCall = (VCFGenotypeCall)g; - String uniquifiedSample = rodNamesToSampleNames.get(new Pair(rod.getName(), vcfCall.getSampleName())); + List records = rod.getVCFGenotypeRecords(); + for ( VCFGenotypeRecord vcfRec : records ) { + String uniquifiedSample = rodNamesToSampleNames.get(new Pair(rod.getName(), vcfRec.getSampleName())); if ( uniquifiedSample == null ) - throw new StingException("Unexpected sample encountered: " + vcfCall.getSampleName() + " in rod " + rod.getName()); + throw new StingException("Unexpected sample encountered: " + vcfRec.getSampleName() + " in rod " + rod.getName()); - samplesToRecords.put(uniquifiedSample, vcfCall); + samplesToRecords.put(uniquifiedSample, vcfRec); } } @@ -173,4 +169,4 @@ public class CallsetConcordanceWalker extends RodWalker { vcfWriter.close(); out.printf("Processed %d loci.\n", result); } -} \ No newline at end of file +} diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/concordance/ConcordanceType.java b/java/src/org/broadinstitute/sting/gatk/walkers/concordance/ConcordanceType.java index 9e7340ae9..eb8e993cd 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/concordance/ConcordanceType.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/concordance/ConcordanceType.java @@ -1,7 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.concordance; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeCall; +import org.broadinstitute.sting.utils.genotype.Genotype; import java.util.Map; import java.util.Set; @@ -9,6 +9,6 @@ import java.util.Set; public interface ConcordanceType { public void initialize(Map args, Set samples); - public String computeConcordance(Map samplesToRecords, ReferenceContext ref); + public String computeConcordance(Map samplesToRecords, ReferenceContext ref); public String getInfoName(); } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/concordance/IndelSubsets.java b/java/src/org/broadinstitute/sting/gatk/walkers/concordance/IndelSubsets.java index 041256c29..f1daf55d0 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/concordance/IndelSubsets.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/concordance/IndelSubsets.java @@ -4,7 +4,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.genotype.Variation; -import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeCall; +import org.broadinstitute.sting.utils.genotype.Genotype; import java.util.*; @@ -53,10 +53,10 @@ public class IndelSubsets implements ConcordanceType { } } - public String computeConcordance(Map samplesToRecords, ReferenceContext ref) { + public String computeConcordance(Map samplesToRecords, ReferenceContext ref) { - VCFGenotypeCall indel1 = samplesToRecords.get(sample1); - VCFGenotypeCall indel2 = samplesToRecords.get(sample2); + Genotype indel1 = samplesToRecords.get(sample1); + Genotype indel2 = samplesToRecords.get(sample2); int set1 = ( indel1 != null && !indel1.isPointGenotype() ? 0 : 1 ); int set2 = ( indel2 != null && !indel2.isPointGenotype() ? 0 : 1 ); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/concordance/NWayVenn.java b/java/src/org/broadinstitute/sting/gatk/walkers/concordance/NWayVenn.java index 778a9a952..bd90f47f8 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/concordance/NWayVenn.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/concordance/NWayVenn.java @@ -1,7 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.concordance; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeCall; +import org.broadinstitute.sting.utils.genotype.Genotype; import java.util.*; import java.util.Map.Entry; @@ -18,12 +18,12 @@ public class NWayVenn implements ConcordanceType { public void initialize(Map args, Set samples) { } - public String computeConcordance(Map samplesToRecords, ReferenceContext ref) { + public String computeConcordance(Map samplesToRecords, ReferenceContext ref) { if ( samplesToRecords.size() == 0 ) return null; TreeSet concordantSamples = new TreeSet(); - for ( Entry entry : samplesToRecords.entrySet() ) { + for ( Entry entry : samplesToRecords.entrySet() ) { concordantSamples.add(entry.getKey()); } @@ -39,4 +39,4 @@ public class NWayVenn implements ConcordanceType { } public String getInfoName() { return "NwayVenn"; } -} \ No newline at end of file +} diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/concordance/SNPGenotypeConcordance.java b/java/src/org/broadinstitute/sting/gatk/walkers/concordance/SNPGenotypeConcordance.java index 4cb68dfb9..e0e1af943 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/concordance/SNPGenotypeConcordance.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/concordance/SNPGenotypeConcordance.java @@ -2,7 +2,7 @@ package org.broadinstitute.sting.gatk.walkers.concordance; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.utils.StingException; -import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeCall; +import org.broadinstitute.sting.utils.genotype.Genotype; import java.util.*; @@ -29,10 +29,10 @@ public class SNPGenotypeConcordance implements ConcordanceType { sample2 = iter.next(); } - public String computeConcordance(Map samplesToRecords, ReferenceContext ref) { + public String computeConcordance(Map samplesToRecords, ReferenceContext ref) { - VCFGenotypeCall call1 = samplesToRecords.get(sample1); - VCFGenotypeCall call2 = samplesToRecords.get(sample2); + Genotype call1 = samplesToRecords.get(sample1); + Genotype call2 = samplesToRecords.get(sample2); // the only reason they would be null is a lack of coverage if ( call1 == null || call2 == null ) { @@ -78,9 +78,9 @@ public class SNPGenotypeConcordance implements ConcordanceType { } // one is variant and the other is ref - else if ( call1.isPointGenotype() && call2.isVariant() && confidence1 >= Qscore ) + else if ( call1.isPointGenotype() && call2.isVariant(ref.getBase()) && confidence1 >= Qscore ) return "set1VariantSet2Ref"; - else if ( call2.isPointGenotype() && call1.isVariant() && confidence2 >= Qscore ) + else if ( call2.isPointGenotype() && call1.isVariant(ref.getBase()) && confidence2 >= Qscore ) return "set1RefSet2Variant"; return null; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/concordance/SimpleVenn.java b/java/src/org/broadinstitute/sting/gatk/walkers/concordance/SimpleVenn.java index 3a4a97bea..fa14a3a20 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/concordance/SimpleVenn.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/concordance/SimpleVenn.java @@ -2,7 +2,7 @@ package org.broadinstitute.sting.gatk.walkers.concordance; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.utils.genotype.Variation; -import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeCall; +import org.broadinstitute.sting.utils.genotype.Genotype; import org.broadinstitute.sting.utils.StingException; import java.util.*; @@ -25,10 +25,10 @@ public class SimpleVenn implements ConcordanceType { sample2 = iter.next(); } - public String computeConcordance(Map samplesToRecords, ReferenceContext ref) { + public String computeConcordance(Map samplesToRecords, ReferenceContext ref) { - VCFGenotypeCall call1 = samplesToRecords.get(sample1); - VCFGenotypeCall call2 = samplesToRecords.get(sample2); + Genotype call1 = samplesToRecords.get(sample1); + Genotype call2 = samplesToRecords.get(sample2); if ( call1 == null && call2 == null ) return null; diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeEncoding.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeEncoding.java index df300de35..61ed7a878 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeEncoding.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeEncoding.java @@ -25,8 +25,8 @@ public class VCFGenotypeEncoding { public VCFGenotypeEncoding(String baseString) { if ((baseString.length() == 1)) { // are we an empty (no-call) genotype? - if (baseString.equals(VCFGenotypeRecord.EMPTY_GENOTYPE)) { - mBases = VCFGenotypeRecord.EMPTY_GENOTYPE; + if (baseString.equals(VCFGenotypeRecord.EMPTY_ALLELE)) { + mBases = VCFGenotypeRecord.EMPTY_ALLELE; mLength = 0; mType = TYPE.UNCALLED; } else if (!validBases(baseString)) { @@ -67,10 +67,14 @@ public class VCFGenotypeEncoding { } public boolean equals(Object obj) { - if (obj != null && (obj.getClass().equals(this.getClass()))) { + if ( obj == null ) + return false; + if ( obj instanceof VCFGenotypeEncoding ) { VCFGenotypeEncoding d = (VCFGenotypeEncoding) obj; return (mType == d.mType) && (mBases.equals(d.mBases)) && (mLength == d.mLength); } + if ( mType == TYPE.UNCALLED && obj.toString().equals(VCFGenotypeRecord.EMPTY_ALLELE) ) + return true; return false; } @@ -84,7 +88,7 @@ public class VCFGenotypeEncoding { /** * dump the string representation of this genotype encoding * - * @return + * @return string representation */ public String toString() { StringBuilder builder = new StringBuilder(); diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeRecord.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeRecord.java index 261a97724..f6c9ca700 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeRecord.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeRecord.java @@ -16,8 +16,11 @@ import java.util.Map; *

*/ public class VCFGenotypeRecord implements Genotype { - // the symbol for a empty genotype - public static final String EMPTY_GENOTYPE = "."; + // the symbols for an empty genotype + public static final String EMPTY_GENOTYPE = "./."; + public static final String EMPTY_ALLELE = "."; + + public static final int MISSING_DEPTH = -1; // what kind of phasing this genotype has public enum PHASE { @@ -37,7 +40,7 @@ public class VCFGenotypeRecord implements Genotype { private final Map mFields = new HashMap(); // our sample name - private final String mSampleName; + private String mSampleName; /** * Create a VCF genotype record @@ -58,6 +61,10 @@ public class VCFGenotypeRecord implements Genotype { this.mRecord = record; } + public void setSampleName(String name) { + mSampleName = name; + } + /** * determine the phase of the genotype * @@ -98,6 +105,15 @@ public class VCFGenotypeRecord implements Genotype { return ( mFields.containsKey("GQ") ? Double.valueOf(mFields.get("GQ")) / 10.0 : 0.0); } + public int getReadCount() { + int depth = MISSING_DEPTH; + if ( mFields.containsKey("RD") ) + depth = Integer.valueOf(mFields.get("RD")); + else if ( mFields.containsKey("DP") ) + depth = Integer.valueOf(mFields.get("DP")); + return depth; + } + public GenomeLoc getLocation() { return mRecord != null ? mRecord.getLocation() : null; } @@ -157,7 +173,7 @@ public class VCFGenotypeRecord implements Genotype { boolean first = true; for (VCFGenotypeEncoding allele : mGenotypeAlleles) { if (allele.getType() == VCFGenotypeEncoding.TYPE.UNCALLED) - str += VCFGenotypeRecord.EMPTY_GENOTYPE; + str += VCFGenotypeRecord.EMPTY_ALLELE; else str += String.valueOf((altAlleles.contains(allele)) ? altAlleles.indexOf(allele) + 1 : 0); if (first) { diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java index a329f5bca..9bf5f08bd 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java @@ -213,8 +213,8 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter { List alleles = new ArrayList(); - alleles.add(new VCFGenotypeEncoding(VCFGenotypeRecord.EMPTY_GENOTYPE)); - alleles.add(new VCFGenotypeEncoding(VCFGenotypeRecord.EMPTY_GENOTYPE)); + alleles.add(new VCFGenotypeEncoding(VCFGenotypeRecord.EMPTY_ALLELE)); + alleles.add(new VCFGenotypeEncoding(VCFGenotypeRecord.EMPTY_ALLELE)); VCFGenotypeRecord record = new VCFGenotypeRecord(sampleName, alleles, diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFParameters.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFParameters.java index 8d85f2c72..92da3b308 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFParameters.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFParameters.java @@ -65,7 +65,9 @@ class VCFParameters { } public void addAlternateBase(VCFGenotypeEncoding base) { - if (!alternateBases.contains(base) && !base.toString().equals(String.valueOf(this.getReferenceBase()))) + if ( !alternateBases.contains(base) && + !base.toString().equals(String.valueOf(getReferenceBase())) && + !base.toString().equals(VCFGenotypeRecord.EMPTY_ALLELE) ) alternateBases.add(base); } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFReader.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFReader.java index f789affd2..2e4ce0482 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFReader.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFReader.java @@ -206,15 +206,20 @@ public class VCFReader implements Iterator, Iterable { // if we have genotyping data, we try and extract the genotype fields if (mHeader.hasGenotypingData()) { String mFormatString = tokens[index]; + String keyStrings[] = mFormatString.split(":"); List genotypeRecords = new ArrayList(); index++; String[] alt_alleles = values.get(VCFHeader.HEADER_FIELDS.ALT).split(","); for (String str : mHeader.getGenotypeSamples()) { - if (!tokens[index].equalsIgnoreCase(VCFGenotypeRecord.EMPTY_GENOTYPE)) - genotypeRecords.add(getVCFGenotype(str, mFormatString, tokens[index], alt_alleles, values.get(VCFHeader.HEADER_FIELDS.REF).charAt(0))); + genotypeRecords.add(getVCFGenotype(str, keyStrings, tokens[index], alt_alleles, values.get(VCFHeader.HEADER_FIELDS.REF).charAt(0))); index++; } - return new VCFRecord(values, mFormatString, genotypeRecords); + VCFRecord vrec = new VCFRecord(values, mFormatString, genotypeRecords); + // associate the genotypes with this new record + for ( VCFGenotypeRecord gr : genotypeRecords ) + gr.setVCFRecord(vrec); + return vrec; + } return new VCFRecord(values); } @@ -231,11 +236,25 @@ public class VCFReader implements Iterator, Iterable { * @return a VCFGenotypeRecord */ public static VCFGenotypeRecord getVCFGenotype(String sampleName, String formatString, String genotypeString, String altAlleles[], char referenceBase) { + return getVCFGenotype(sampleName, formatString.split(":"), genotypeString, altAlleles, referenceBase); + } + + /** + * generate a VCF genotype record, given it's format string, the genotype string, and allele info + * + * @param sampleName the sample name + * @param keyStrings the split format string for this record, which contains the keys for the genotype parameters + * @param genotypeString contains the phasing information, allele information, and values for genotype parameters + * @param altAlleles the alternate allele string array, which we index into based on the field parameters + * @param referenceBase the reference base + * + * @return a VCFGenotypeRecord + */ + public static VCFGenotypeRecord getVCFGenotype(String sampleName, String[] keyStrings, String genotypeString, String altAlleles[], char referenceBase) { // parameters to create the VCF genotype record Map tagToValue = new HashMap(); VCFGenotypeRecord.PHASE phase = VCFGenotypeRecord.PHASE.UNKNOWN; List bases = new ArrayList(); - String keyStrings[] = formatString.split(":"); for (String key : keyStrings) { String parse; @@ -260,11 +279,13 @@ public class VCFReader implements Iterator, Iterable { if (nextDivider + 1 >= genotypeString.length()) nextDivider = genotypeString.length() - 1; genotypeString = genotypeString.substring(nextDivider + 1, genotypeString.length()); } + if ( bases.size() > 0 && bases.get(0).equals(VCFGenotypeRecord.EMPTY_ALLELE) ) + tagToValue.clear(); // catch some common errors, either there are too many field keys or there are two many field values - if (keyStrings.length != tagToValue.size() + ((bases.size() > 0) ? 1 : 0)) + else if ( keyStrings.length != tagToValue.size() + ((bases.size() > 0) ? 1 : 0)) throw new RuntimeException("VCFReader: genotype value count doesn't match the key count (expected " + keyStrings.length + " but saw " + tagToValue.size() + ")"); - else if (genotypeString.length() > 0) + else if ( genotypeString.length() > 0 ) throw new RuntimeException("VCFReader: genotype string contained additional unprocessed fields: " + genotypeString + ". This most likely means that the format string is shorter then the value fields."); return new VCFGenotypeRecord(sampleName, bases, phase, tagToValue); @@ -280,8 +301,8 @@ public class VCFReader implements Iterator, Iterable { * @param bases the list of bases for this genotype call */ private static void addAllele(String alleleNumber, String[] altAlleles, char referenceBase, List bases) { - if (alleleNumber.equals(VCFGenotypeRecord.EMPTY_GENOTYPE)) { - bases.add(new VCFGenotypeEncoding(VCFGenotypeRecord.EMPTY_GENOTYPE)); + if (alleleNumber.equals(VCFGenotypeRecord.EMPTY_ALLELE)) { + bases.add(new VCFGenotypeEncoding(VCFGenotypeRecord.EMPTY_ALLELE)); } else { int alleleValue = Integer.valueOf(alleleNumber); // check to make sure the allele value is within bounds diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFRecord.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFRecord.java index c93fc025e..9b863108e 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFRecord.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFRecord.java @@ -85,11 +85,7 @@ public class VCFRecord implements Variation, VariantBackedByGenotype { this.setQual(qual); this.setFilterString(filters); this.mInfoFields.putAll(infoFields); - mGenotypeFormatString = genotypeFormatString; - - // associate the genotypes with this Variation, then add them - for ( VCFGenotypeRecord rec : genotypeObjects ) - rec.setVCFRecord(this); + this.mGenotypeFormatString = genotypeFormatString; this.mGenotypeFields.addAll(genotypeObjects); } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFUtils.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFUtils.java index c910e36f1..a140d1fbe 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFUtils.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFUtils.java @@ -5,8 +5,7 @@ import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData; import org.broadinstitute.sting.gatk.refdata.RodVCF; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.utils.Pair; -import org.broadinstitute.sting.utils.StingException; -import org.broadinstitute.sting.utils.genotype.Genotype; +import org.broadinstitute.sting.utils.genotype.*; import java.util.*; import java.util.Map.Entry; @@ -116,17 +115,13 @@ public class VCFUtils { int freqsSeen = 0; for ( RodVCF rod : rods ) { - List myGenotypes = rod.getGenotypes(); - for ( Genotype g : myGenotypes ) { - if ( !(g instanceof VCFGenotypeCall) ) - throw new StingException("Expected VCFGenotypeCall object but instead saw " + g.getClass().getSimpleName()); - + List myGenotypes = rod.getVCFGenotypeRecords(); + for ( VCFGenotypeRecord call : myGenotypes ) { // set the name to be the new uniquified name and add it to the list of genotypes - VCFGenotypeCall call = (VCFGenotypeCall)g; call.setSampleName(rodNamesToSampleNames.get(new Pair(rod.getName(), call.getSampleName()))); if ( params.getPosition() < 1 ) - params.setLocations(call.getLocation(), call.getReference()); - params.addGenotypeRecord(createVCFGenotypeRecord(params, call)); + params.setLocations(rod.getLocation(), call.getReference()); + params.addGenotypeRecord(createVCFGenotypeRecord(params, call, rod.mCurrentRecord)); totalReadDepth += call.getReadCount(); } @@ -168,6 +163,39 @@ public class VCFUtils { params.getGenotypesRecords()); } + /** + * create the VCF genotype record + * + * @param params the VCF parameters object + * @param gtype the genotype + * @param vcfrecord the VCF record + * + * @return a VCFGenotypeRecord + */ + public static VCFGenotypeRecord createVCFGenotypeRecord(VCFParameters params, VCFGenotypeRecord gtype, VCFRecord vcfrecord) { + Map map = new HashMap(); + + // calculate the RMS mapping qualities and the read depth + int readDepth = gtype.getReadCount(); + map.put("RD", String.valueOf(readDepth)); + params.addFormatItem("RD"); + double qual = 10.0 * gtype.getNegLog10PError(); + map.put("GQ", String.format("%.2f", qual)); + params.addFormatItem("GQ"); + + List alleles = createAlleleArray(gtype); + for (VCFGenotypeEncoding allele : alleles) { + params.addAlternateBase(allele); + } + + VCFGenotypeRecord record = new VCFGenotypeRecord(gtype.getSampleName(), + alleles, + VCFGenotypeRecord.PHASE.UNPHASED, + map); + record.setVCFRecord(vcfrecord); + return record; + } + /** * create the VCF genotype record * diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/annotator/SecondBaseSkewIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/annotator/SecondBaseSkewIntegrationTest.java index f46d65834..e8a6c50c6 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/annotator/SecondBaseSkewIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/annotator/SecondBaseSkewIntegrationTest.java @@ -61,7 +61,7 @@ public class SecondBaseSkewIntegrationTest extends WalkerTest { + " -R /seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta -A SecondBaseSkew" + " -sample variant -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/FHS_pileup_test_chr15.vcf" + " -vcf %s -L chr15:46347148"; - String expected_md5 = "fe7f6d2b48ecf2c1340b5de98a9d5614"; + String expected_md5 = "160a8e3826eb745bcfe2f463f73e1ec7"; WalkerTestSpec spec = new WalkerTestSpec(test_args,1,Arrays.asList(expected_md5)); executeTest("Testing on locus with many indels", spec); } diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/concordance/CallsetConcordanceIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/concordance/CallsetConcordanceIntegrationTest.java index 712db17ce..f8a39a6f5 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/concordance/CallsetConcordanceIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/concordance/CallsetConcordanceIntegrationTest.java @@ -14,7 +14,7 @@ public class CallsetConcordanceIntegrationTest extends WalkerTest { public void testSimpleVenn() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -B set1,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.example1.vcf -B set2,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.example2.vcf -CT SimpleVenn", 1, - Arrays.asList("2fc12b6f02f4cb589f2fd134e765d6b7")); + Arrays.asList("851b68004874f3a2e76d795e7401f8a0")); executeTest("testSimpleVenn", spec); } @@ -22,7 +22,7 @@ public class CallsetConcordanceIntegrationTest extends WalkerTest { public void testSNPConcordance() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -B set1,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.example1.vcf -B set2,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.example2.vcf -CT SNPGenotypeConcordance:qscore=5", 1, - Arrays.asList("142bcfcc6bb404cd4bd1a4624fa9a15e")); + Arrays.asList("7afb56b30257fe2d66bee7a029d75685")); executeTest("testSNPConcordance", spec); } @@ -30,7 +30,7 @@ public class CallsetConcordanceIntegrationTest extends WalkerTest { public void testNWayVenn() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -B set1,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.example1.vcf -B set2,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.example2.vcf -B set3,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/CEU.sample.vcf -CT NWayVenn", 1, - Arrays.asList("9a5910137b6b9745f6e0c3ee711a6bfa")); + Arrays.asList("f452c04c600ad10c054f18b0c77b53d5")); executeTest("testNWayVenn", spec); } } \ No newline at end of file