Cleaning up the VCF code, adding lots of tests for a variety of edge cases. Two issues are still outstanding: updating the no call string with the standard 1000g decided on today, and fixing Eric's issue where not all the VCF sample names are present initially.
also: their, I hope your happy Eric, from now on I'll try not to flout my awesomest grammer in the future accept when I need to illicit a strong response :-) git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1858 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
b82c3b6040
commit
a69ea9b57c
|
|
@ -4,8 +4,11 @@ import org.broadinstitute.sting.utils.GenomeLoc;
|
|||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.genotype.*;
|
||||
import org.broadinstitute.sting.utils.genotype.BasicGenotype;
|
||||
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
|
||||
import org.broadinstitute.sting.utils.genotype.Genotype;
|
||||
import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeEncoding;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeRecord;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFReader;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord;
|
||||
|
|
@ -115,8 +118,8 @@ public class RodVCF extends BasicReferenceOrderedDatum implements VariationRod,
|
|||
this.assertNotNull();
|
||||
if (!mCurrentRecord.hasAlternateAllele())
|
||||
return false;
|
||||
for (String alt : this.mCurrentRecord.getAlternateAlleles()) {
|
||||
if (alt.length() != 1)
|
||||
for (VCFGenotypeEncoding alt : this.mCurrentRecord.getAlternateAlleles()) {
|
||||
if (alt.getType() != VCFGenotypeEncoding.TYPE.SINGLE_BASE)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
|
|
@ -132,8 +135,8 @@ public class RodVCF extends BasicReferenceOrderedDatum implements VariationRod,
|
|||
this.assertNotNull();
|
||||
if (!mCurrentRecord.hasAlternateAllele())
|
||||
return false;
|
||||
for (String alt : this.mCurrentRecord.getAlternateAlleles()) {
|
||||
if (alt.startsWith("I"))
|
||||
for (VCFGenotypeEncoding alt : this.mCurrentRecord.getAlternateAlleles()) {
|
||||
if (alt.getType() == VCFGenotypeEncoding.TYPE.INSERTION)
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
|
|
@ -149,8 +152,8 @@ public class RodVCF extends BasicReferenceOrderedDatum implements VariationRod,
|
|||
this.assertNotNull();
|
||||
if (!mCurrentRecord.hasAlternateAllele())
|
||||
return false;
|
||||
for (String alt : this.mCurrentRecord.getAlternateAlleles()) {
|
||||
if (alt.startsWith("D"))
|
||||
for (VCFGenotypeEncoding alt : this.mCurrentRecord.getAlternateAlleles()) {
|
||||
if (alt.getType() == VCFGenotypeEncoding.TYPE.DELETION)
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
|
|
@ -208,7 +211,7 @@ public class RodVCF extends BasicReferenceOrderedDatum implements VariationRod,
|
|||
public String getAlternateBases() {
|
||||
if (!this.isBiallelic())
|
||||
throw new UnsupportedOperationException("We're not biallelic, so please call getAlternateBaseList instead");
|
||||
return this.mCurrentRecord.getAlternateAlleles().get(0);
|
||||
return this.mCurrentRecord.getAlternateAlleles().get(0).toString();
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -218,7 +221,10 @@ public class RodVCF extends BasicReferenceOrderedDatum implements VariationRod,
|
|||
*/
|
||||
@Override
|
||||
public List<String> getAlternateBaseList() {
|
||||
return this.mCurrentRecord.getAlternateAlleles();
|
||||
List<String> list = new ArrayList<String>();
|
||||
for (VCFGenotypeEncoding enc : mCurrentRecord.getAlternateAlleles())
|
||||
list.add(enc.toString());
|
||||
return list;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -240,7 +246,8 @@ public class RodVCF extends BasicReferenceOrderedDatum implements VariationRod,
|
|||
@Override
|
||||
public char getAlternativeBaseForSNP() {
|
||||
if (!isSNP()) throw new IllegalStateException("we're not a SNP");
|
||||
return mCurrentRecord.getAlternateAlleles().get(0).charAt(0);
|
||||
if (mCurrentRecord.getAlternateAlleles().size() != 1) throw new UnsupportedOperationException("We're not a biallelic VCF site");
|
||||
return (mCurrentRecord.getAlternateAlleles().get(0).toString()).charAt(0);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -14,10 +14,7 @@ import org.broadinstitute.sting.utils.cmdLine.Argument;
|
|||
import org.broadinstitute.sting.utils.genotype.Genotype;
|
||||
import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype;
|
||||
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeRecord;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFHeader;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.*;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.PrintStream;
|
||||
|
|
@ -103,7 +100,7 @@ public class VariantsToVCF extends RefWalker<Integer, Integer> {
|
|||
int[] alleleNames = {0, 1, 2, 3};
|
||||
double snpQual = 0.0;
|
||||
int refbase = BaseUtils.simpleBaseToBaseIndex(ref.getBase());
|
||||
List<String> alts = new ArrayList<String>();
|
||||
List<VCFGenotypeEncoding> alts = new ArrayList<VCFGenotypeEncoding>();
|
||||
for (String name : vcfheader.getGenotypeSamples()) {
|
||||
ReferenceOrderedDatum rod = tracker.lookup(sampleNamesToRods.get(name), null);
|
||||
if (rod != null) {
|
||||
|
|
@ -118,10 +115,10 @@ public class VariantsToVCF extends RefWalker<Integer, Integer> {
|
|||
if (!(rod instanceof VariantBackedByGenotype))
|
||||
throw new IllegalArgumentException("The passed in variant type must be backed by genotype data");
|
||||
Genotype genotype = ((VariantBackedByGenotype) rod).getCalledGenotype();
|
||||
List<String> alleles = new ArrayList<String>();
|
||||
List<VCFGenotypeEncoding> alleles = new ArrayList<VCFGenotypeEncoding>();
|
||||
for (char base : genotype.getBases().toCharArray()) {
|
||||
alleles.add(String.valueOf(base));
|
||||
if (base != ref.getBase() && !alts.contains(String.valueOf(base))) alts.add(String.valueOf(base));
|
||||
alleles.add(new VCFGenotypeEncoding(String.valueOf(base)));
|
||||
if (base != ref.getBase() && !alts.contains(String.valueOf(base))) alts.add(new VCFGenotypeEncoding(String.valueOf(base)));
|
||||
}
|
||||
int allele1 = BaseUtils.simpleBaseToBaseIndex(genotype.getBases().charAt(0));
|
||||
int allele2 = BaseUtils.simpleBaseToBaseIndex(genotype.getBases().charAt(1));
|
||||
|
|
@ -141,9 +138,9 @@ public class VariantsToVCF extends RefWalker<Integer, Integer> {
|
|||
snpQual += av.getNegLog10PError();
|
||||
} else {
|
||||
Map<String, String> str = new HashMap<String, String>();
|
||||
List<String> alleles = new ArrayList<String>();
|
||||
alleles.add(String.valueOf(ref.getBase()));
|
||||
alleles.add(String.valueOf(ref.getBase()));
|
||||
List<VCFGenotypeEncoding> alleles = new ArrayList<VCFGenotypeEncoding>();
|
||||
alleles.add(new VCFGenotypeEncoding(String.valueOf(ref.getBase())));
|
||||
alleles.add(new VCFGenotypeEncoding(String.valueOf(ref.getBase())));
|
||||
gt.add(new VCFGenotypeRecord(name, alleles, VCFGenotypeRecord.PHASE.UNPHASED, str));
|
||||
|
||||
numRefs++;
|
||||
|
|
|
|||
|
|
@ -0,0 +1,122 @@
|
|||
package org.broadinstitute.sting.utils.genotype.vcf;
|
||||
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* <p/>
|
||||
* Class VCFGenotypeEncoding
|
||||
* <p/>
|
||||
* basic encoding class for genotype fields in VCF
|
||||
*/
|
||||
public class VCFGenotypeEncoding {
|
||||
public enum TYPE {
|
||||
SINGLE_BASE,
|
||||
INSERTION,
|
||||
DELETION,
|
||||
UNCALLED
|
||||
}
|
||||
|
||||
// our length (0 for SINGLE_BASE), our bases, and our type
|
||||
private final int mLength;
|
||||
private final String mBases;
|
||||
private final TYPE mType;
|
||||
|
||||
// public constructor, that parses out the base string
|
||||
public VCFGenotypeEncoding(String baseString) {
|
||||
if ((baseString.length() == 1)) {
|
||||
// are we an empty (no-call) genotype?
|
||||
if (baseString.equals(VCFGenotypeRecord.EMPTY_GENOTYPE)) {
|
||||
mBases = VCFGenotypeRecord.EMPTY_GENOTYPE;
|
||||
mLength = 0;
|
||||
mType = TYPE.UNCALLED;
|
||||
} else if (!validBases(baseString)) {
|
||||
throw new IllegalArgumentException("Alleles of length 1 must be one of A,C,G,T, " + baseString + " was passed in");
|
||||
} else { // we're a valid base
|
||||
mBases = baseString.toUpperCase();
|
||||
mLength = 0;
|
||||
mType = TYPE.SINGLE_BASE;
|
||||
}
|
||||
} else { // deletion or insertion
|
||||
if (baseString.length() < 1 || (baseString.toUpperCase().charAt(0) != 'D' && baseString.toUpperCase().charAt(0) != 'I')) {
|
||||
throw new IllegalArgumentException("Genotype encoding of " + baseString + " was passed in, but is not a valid deletion, insertion, base, or no call (.)");
|
||||
}
|
||||
if (baseString.toUpperCase().charAt(0) == 'D') {
|
||||
mLength = Integer.valueOf(baseString.substring(1, baseString.length()));
|
||||
mBases = "";
|
||||
mType = TYPE.DELETION;
|
||||
} else { // we're an I
|
||||
mBases = baseString.substring(1, baseString.length()).toUpperCase();
|
||||
if (!validBases(mBases))
|
||||
throw new IllegalArgumentException("The insertion base string contained invalid bases -> " + baseString);
|
||||
mLength = mBases.length();
|
||||
mType = TYPE.INSERTION;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public int getLength() {
|
||||
return mLength;
|
||||
}
|
||||
|
||||
public String getBases() {
|
||||
return mBases;
|
||||
}
|
||||
|
||||
public TYPE getType() {
|
||||
return mType;
|
||||
}
|
||||
|
||||
public boolean equals(Object obj) {
|
||||
if (obj != null && (obj.getClass().equals(this.getClass()))) {
|
||||
VCFGenotypeEncoding d = (VCFGenotypeEncoding) obj;
|
||||
return (mType == d.mType) && (mBases.equals(d.mBases)) && (mLength == d.mLength);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
public int hashCode() {
|
||||
// our underlying data is immutable, so this is safe (we won't strand a value in a hashtable somewhere
|
||||
// when the data changes underneath, altering this value).
|
||||
String str = this.mBases + String.valueOf(this.mLength) + this.mType.toString();
|
||||
return str.hashCode();
|
||||
}
|
||||
|
||||
/**
|
||||
* dump the string representation of this genotype encoding
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public String toString() {
|
||||
StringBuilder builder = new StringBuilder();
|
||||
switch (mType) {
|
||||
case SINGLE_BASE:
|
||||
case UNCALLED:
|
||||
builder.append(mBases);
|
||||
break;
|
||||
case INSERTION:
|
||||
builder.append("I");
|
||||
builder.append(mBases);
|
||||
break;
|
||||
case DELETION:
|
||||
builder.append("D");
|
||||
builder.append(mLength);
|
||||
break;
|
||||
}
|
||||
return builder.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* ensure that string contains valid bases
|
||||
*
|
||||
* @param bases the bases to check
|
||||
*
|
||||
* @return true if they're all either A,C,G,T; false otherwise
|
||||
*/
|
||||
private static boolean validBases(String bases) {
|
||||
for (char c : bases.toUpperCase().toCharArray()) {
|
||||
if (c != 'A' && c != 'C' && c != 'G' && c != 'T')
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
|
@ -27,7 +27,7 @@ public class VCFGenotypeRecord {
|
|||
private PHASE mPhaseType;
|
||||
|
||||
// our bases(s)
|
||||
private final List<String> mGenotypeAlleles = new ArrayList<String>();
|
||||
private final List<VCFGenotypeEncoding> mGenotypeAlleles = new ArrayList<VCFGenotypeEncoding>();
|
||||
|
||||
// our mapping of the format mFields to values
|
||||
private final Map<String, String> mFields = new HashMap<String, String>();
|
||||
|
|
@ -43,7 +43,7 @@ public class VCFGenotypeRecord {
|
|||
* @param phasing
|
||||
* @param otherFlags
|
||||
*/
|
||||
public VCFGenotypeRecord(String sampleName, List<String> genotypes, PHASE phasing, Map<String, String> otherFlags) {
|
||||
public VCFGenotypeRecord(String sampleName, List<VCFGenotypeEncoding> genotypes, PHASE phasing, Map<String, String> otherFlags) {
|
||||
this.mSampleName = sampleName;
|
||||
if (genotypes != null) this.mGenotypeAlleles.addAll(genotypes);
|
||||
this.mPhaseType = phasing;
|
||||
|
|
@ -78,7 +78,7 @@ public class VCFGenotypeRecord {
|
|||
return mSampleName;
|
||||
}
|
||||
|
||||
public List<String> getAlleles() {
|
||||
public List<VCFGenotypeEncoding> getAlleles() {
|
||||
return mGenotypeAlleles;
|
||||
}
|
||||
|
||||
|
|
@ -86,10 +86,10 @@ public class VCFGenotypeRecord {
|
|||
return mFields;
|
||||
}
|
||||
|
||||
public String toGenotypeString(List<String> altAlleles) {
|
||||
public String toGenotypeString(List<VCFGenotypeEncoding> altAlleles) {
|
||||
String str = "";
|
||||
boolean first = true;
|
||||
for (String allele : mGenotypeAlleles) {
|
||||
for (VCFGenotypeEncoding allele : mGenotypeAlleles) {
|
||||
str += String.valueOf((altAlleles.contains(allele)) ? altAlleles.indexOf(allele) + 1 : 0);
|
||||
if (first) {
|
||||
switch (mPhaseType) {
|
||||
|
|
|
|||
|
|
@ -199,8 +199,8 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter {
|
|||
map.put("GQ", String.format("%.2f", qual));
|
||||
params.addFormatItem("GQ");
|
||||
|
||||
List<String> alleles = createAlleleArray(gtype);
|
||||
for (String allele : alleles) {
|
||||
List<VCFGenotypeEncoding> alleles = createAlleleArray(gtype);
|
||||
for (VCFGenotypeEncoding allele : alleles) {
|
||||
params.addAlternateBase(allele);
|
||||
}
|
||||
|
||||
|
|
@ -218,10 +218,10 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter {
|
|||
*
|
||||
* @return a list of string representing the string array of alleles
|
||||
*/
|
||||
private List<String> createAlleleArray(Genotype gtype) {
|
||||
List<String> alleles = new ArrayList<String>();
|
||||
private List<VCFGenotypeEncoding> createAlleleArray(Genotype gtype) {
|
||||
List<VCFGenotypeEncoding> alleles = new ArrayList<VCFGenotypeEncoding>();
|
||||
for (char allele : gtype.getBases().toCharArray()) {
|
||||
alleles.add(String.valueOf(allele));
|
||||
alleles.add(new VCFGenotypeEncoding(String.valueOf(allele)));
|
||||
}
|
||||
return alleles;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -18,7 +18,7 @@ class VCFParameters {
|
|||
private boolean initialized = false;
|
||||
private List<VCFGenotypeRecord> genotypesRecord = new ArrayList<VCFGenotypeRecord>();
|
||||
private List<String> formatList = new ArrayList<String>();
|
||||
private List<String> alternateBases = new ArrayList<String>();
|
||||
private List<VCFGenotypeEncoding> alternateBases = new ArrayList<VCFGenotypeEncoding>();
|
||||
|
||||
public void setLocations(GenomeLoc location, char refBase) {
|
||||
// if we haven't set it up, we initialize the object
|
||||
|
|
@ -64,12 +64,12 @@ class VCFParameters {
|
|||
formatList.add(item);
|
||||
}
|
||||
|
||||
public void addAlternateBase(String base) {
|
||||
if (!alternateBases.contains(String.valueOf(base)) && !base.equals(String.valueOf(this.getReferenceBase())))
|
||||
public void addAlternateBase(VCFGenotypeEncoding base) {
|
||||
if (!alternateBases.contains(base) && !base.toString().equals(String.valueOf(this.getReferenceBase())))
|
||||
alternateBases.add(base);
|
||||
}
|
||||
|
||||
public List<String> getAlternateBases() {
|
||||
public List<VCFGenotypeEncoding> getAlternateBases() {
|
||||
return alternateBases;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -216,7 +216,7 @@ public class VCFReader implements Iterator<VCFRecord>, Iterable<VCFRecord> {
|
|||
// parameters to create the VCF genotype record
|
||||
Map<String, String> tagToValue = new HashMap<String, String>();
|
||||
VCFGenotypeRecord.PHASE phase = VCFGenotypeRecord.PHASE.UNKNOWN;
|
||||
List<String> bases = new ArrayList<String>();
|
||||
List<VCFGenotypeEncoding> bases = new ArrayList<VCFGenotypeEncoding>();
|
||||
String keyStrings[] = formatString.split(":");
|
||||
|
||||
for (String key : keyStrings) {
|
||||
|
|
@ -262,15 +262,15 @@ public class VCFReader implements Iterator<VCFRecord>, Iterable<VCFRecord> {
|
|||
* @param referenceBase the reference base
|
||||
* @param bases the list of bases for this genotype call
|
||||
*/
|
||||
private static void addAllele(String alleleNumber, String[] altAlleles, char referenceBase, List<String> bases) {
|
||||
private static void addAllele(String alleleNumber, String[] altAlleles, char referenceBase, List<VCFGenotypeEncoding> bases) {
|
||||
int alleleValue = Integer.valueOf(alleleNumber);
|
||||
// check to make sure the allele value is within bounds
|
||||
if (alleleValue < 0 || alleleValue > altAlleles.length)
|
||||
throw new IllegalArgumentException("VCFReader: the allele value of " + alleleValue + " is out of bounds given the alternate allele list.");
|
||||
if (alleleValue == 0)
|
||||
bases.add(String.valueOf(referenceBase));
|
||||
bases.add(new VCFGenotypeEncoding(String.valueOf(referenceBase)));
|
||||
else
|
||||
bases.add(altAlleles[alleleValue - 1]);
|
||||
bases.add(new VCFGenotypeEncoding(altAlleles[alleleValue - 1]));
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -5,7 +5,9 @@ import org.broadinstitute.sting.utils.Utils;
|
|||
|
||||
import java.util.*;
|
||||
|
||||
/** the basic VCF record type */
|
||||
/**
|
||||
* the basic VCF record type
|
||||
*/
|
||||
public class VCFRecord {
|
||||
// commonly used strings that are in the standard
|
||||
public static final String FORMAT_FIELD_SEPERATOR = ":";
|
||||
|
|
@ -24,7 +26,7 @@ public class VCFRecord {
|
|||
// our id; set to '.' if not available
|
||||
private String mID;
|
||||
// the alternate bases
|
||||
private final List<String> mAlts = new ArrayList<String>();
|
||||
private final List<VCFGenotypeEncoding> mAlts = new ArrayList<VCFGenotypeEncoding>();
|
||||
// our qual value
|
||||
private double mQual;
|
||||
// our filter string
|
||||
|
|
@ -67,7 +69,7 @@ public class VCFRecord {
|
|||
String contig,
|
||||
int position,
|
||||
String ID,
|
||||
List<String> altBases,
|
||||
List<VCFGenotypeEncoding> altBases,
|
||||
double qual,
|
||||
String filters,
|
||||
Map<String, String> infoFields,
|
||||
|
|
@ -77,7 +79,7 @@ public class VCFRecord {
|
|||
this.mChrome = contig;
|
||||
this.setPosition(position);
|
||||
this.mID = ID;
|
||||
for (String alt : altBases)
|
||||
for (VCFGenotypeEncoding alt : altBases)
|
||||
this.addAlternateBase(alt);
|
||||
this.setQual(qual);
|
||||
this.setFilterString(filters);
|
||||
|
|
@ -121,7 +123,7 @@ public class VCFRecord {
|
|||
case ALT:
|
||||
String values[] = columnValues.get(val).split(",");
|
||||
for (String alt : values)
|
||||
addAlternateBase(alt);
|
||||
addAlternateBase(new VCFGenotypeEncoding(alt));
|
||||
break;
|
||||
case QUAL:
|
||||
this.setQual(Double.valueOf(columnValues.get(val)));
|
||||
|
|
@ -156,18 +158,24 @@ public class VCFRecord {
|
|||
return (mGenotypeFields.size() > 0);
|
||||
}
|
||||
|
||||
/** @return the string for the chromosome that this VCF record is associated with */
|
||||
/**
|
||||
* @return the string for the chromosome that this VCF record is associated with
|
||||
*/
|
||||
public String getChromosome() {
|
||||
return this.mChrome;
|
||||
}
|
||||
|
||||
|
||||
/** @return this VCF records position on the specified chromosome */
|
||||
/**
|
||||
* @return this VCF records position on the specified chromosome
|
||||
*/
|
||||
public long getPosition() {
|
||||
return this.mPosition;
|
||||
}
|
||||
|
||||
/** @return the ID value for this record */
|
||||
/**
|
||||
* @return the ID value for this record
|
||||
*/
|
||||
public String getID() {
|
||||
return this.mID;
|
||||
}
|
||||
|
|
@ -186,7 +194,7 @@ public class VCFRecord {
|
|||
*
|
||||
* @return an array of strings representing the alt alleles, or null if there are none
|
||||
*/
|
||||
public List<String> getAlternateAlleles() {
|
||||
public List<VCFGenotypeEncoding> getAlternateAlleles() {
|
||||
return this.mAlts;
|
||||
}
|
||||
|
||||
|
|
@ -194,7 +202,9 @@ public class VCFRecord {
|
|||
return getAlternateAlleles().size() > 0;
|
||||
}
|
||||
|
||||
/** @return the phred-scaled quality score */
|
||||
/**
|
||||
* @return the phred-scaled quality score
|
||||
*/
|
||||
public double getQual() {
|
||||
return this.mQual;
|
||||
}
|
||||
|
|
@ -206,7 +216,7 @@ public class VCFRecord {
|
|||
*/
|
||||
public String[] getFilteringCodes() {
|
||||
if (mFilterString == null) return new String[]{"0"};
|
||||
return this.mFilterString.split(";");
|
||||
return this.mFilterString.split(FILTER_CODE_SEPERATOR);
|
||||
}
|
||||
|
||||
public boolean hasFilteringCodes() {
|
||||
|
|
@ -227,7 +237,9 @@ public class VCFRecord {
|
|||
return this.mInfoFields;
|
||||
}
|
||||
|
||||
/** @return the number of columnsof data we're storing */
|
||||
/**
|
||||
* @return the number of columnsof data we're storing
|
||||
*/
|
||||
public int getColumnCount() {
|
||||
if (this.hasGenotypeData()) return mGenotypeFields.size() + VCFHeader.HEADER_FIELDS.values().length;
|
||||
return VCFHeader.HEADER_FIELDS.values().length;
|
||||
|
|
@ -242,7 +254,9 @@ public class VCFRecord {
|
|||
return this.mGenotypeFields;
|
||||
}
|
||||
|
||||
/** @return a List of the sample names */
|
||||
/**
|
||||
* @return a List of the sample names
|
||||
*/
|
||||
public String[] getSampleNames() {
|
||||
String names[] = new String[mGenotypeFields.size()];
|
||||
int index = 0;
|
||||
|
|
@ -287,24 +301,26 @@ public class VCFRecord {
|
|||
this.mFilterString = mFilterString;
|
||||
}
|
||||
|
||||
public void addGenotypeFields(VCFGenotypeRecord mGenotypeFields) {
|
||||
public void addGenotypeField(VCFGenotypeRecord mGenotypeFields) {
|
||||
this.mGenotypeFields.add(mGenotypeFields);
|
||||
}
|
||||
|
||||
public void addAlternateBase(String base) {
|
||||
if (base.length() == 1) {
|
||||
char nuc = (char) ((base.charAt(0) > 96) ? base.charAt(0) - 32 : base.charAt(0));
|
||||
if (nuc != 'A' && nuc != 'C' && nuc != 'T' && nuc != 'G' && nuc != '.')
|
||||
throw new IllegalArgumentException("Alternate base must be either A,C,T,G,. or if an indel it must contain length information: " + base);
|
||||
} else {
|
||||
// we must be an indel, check that the first character is I or D
|
||||
char nuc = (char) ((base.charAt(0) > 96) ? base.charAt(0) - 32 : base.charAt(0));
|
||||
if (nuc != 'I' && nuc != 'D')
|
||||
throw new IllegalArgumentException("Alternate bases of length greater then one must be an indel: " + base);
|
||||
}
|
||||
this.mAlts.add(base);
|
||||
/**
|
||||
* add an alternate base to our alternate base list. All bases are uppercased
|
||||
* before being added to the list.
|
||||
*
|
||||
* @param base the base to add
|
||||
*/
|
||||
public void addAlternateBase(VCFGenotypeEncoding base) {
|
||||
if (!mAlts.contains(base)) mAlts.add(base);
|
||||
}
|
||||
|
||||
/**
|
||||
* add an info field to the record
|
||||
*
|
||||
* @param key the key, from the spec or a user created key
|
||||
* @param value it's value as a string
|
||||
*/
|
||||
public void addInfoField(String key, String value) {
|
||||
this.mInfoFields.put(key, value);
|
||||
}
|
||||
|
|
@ -312,31 +328,29 @@ public class VCFRecord {
|
|||
/**
|
||||
* the generation of a string representation, which is used by the VCF writer
|
||||
*
|
||||
* @param header the VCF header for this VCF Record
|
||||
* @return a string
|
||||
*/
|
||||
public String toStringRepresentation(VCFHeader header) {
|
||||
StringBuilder builder = new StringBuilder();
|
||||
|
||||
// CHROM \t POS \t ID \t REF \t ALT \t QUAL \t FILTER \t INFO
|
||||
builder.append(getChromosome() + FIELD_SEPERATOR);
|
||||
builder.append(getPosition() + FIELD_SEPERATOR);
|
||||
builder.append(getID() + FIELD_SEPERATOR);
|
||||
builder.append(getReferenceBase() + FIELD_SEPERATOR);
|
||||
builder.append(getChromosome());
|
||||
builder.append(FIELD_SEPERATOR);
|
||||
builder.append(getPosition());
|
||||
builder.append(FIELD_SEPERATOR);
|
||||
builder.append(getID());
|
||||
builder.append(FIELD_SEPERATOR);
|
||||
builder.append(getReferenceBase());
|
||||
builder.append(FIELD_SEPERATOR);
|
||||
String alts = "";
|
||||
for (String str : this.getAlternateAlleles()) alts += str + ",";
|
||||
for (VCFGenotypeEncoding str : this.getAlternateAlleles()) alts += str.toString() + ",";
|
||||
builder.append((alts.length() > 0) ? alts.substring(0, alts.length() - 1) + FIELD_SEPERATOR : "." + FIELD_SEPERATOR);
|
||||
builder.append(String.format(DOUBLE_PRECISION_FORMAT_STRING,getQual()) + FIELD_SEPERATOR);
|
||||
builder.append(Utils.join(FILTER_CODE_SEPERATOR, getFilteringCodes()) + FIELD_SEPERATOR);
|
||||
String info = "";
|
||||
for (String str : this.getInfoValues().keySet()) {
|
||||
if (str.equals(EMPTY_INFO_FIELD))
|
||||
info = EMPTY_INFO_FIELD;
|
||||
else
|
||||
info += str + "=" + getInfoValues().get(str) + INFO_FIELD_SEPERATOR;
|
||||
}
|
||||
|
||||
if (info.length() > 1) builder.append(info.substring(0, info.length() - 1));
|
||||
else builder.append(info);
|
||||
builder.append(String.format(DOUBLE_PRECISION_FORMAT_STRING, getQual()));
|
||||
builder.append(FIELD_SEPERATOR);
|
||||
builder.append(Utils.join(FILTER_CODE_SEPERATOR, getFilteringCodes()));
|
||||
builder.append(FIELD_SEPERATOR);
|
||||
builder.append(createInfoString());
|
||||
|
||||
if (this.hasGenotypeData()) {
|
||||
addGenotypeData(builder, header);
|
||||
|
|
@ -344,6 +358,22 @@ public class VCFRecord {
|
|||
return builder.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* create the info string
|
||||
*
|
||||
* @return a string representing the infomation fields
|
||||
*/
|
||||
protected String createInfoString() {
|
||||
String info = "";
|
||||
for (String str : this.getInfoValues().keySet()) {
|
||||
if (str.equals(EMPTY_INFO_FIELD))
|
||||
return EMPTY_INFO_FIELD;
|
||||
else
|
||||
info += str + "=" + getInfoValues().get(str) + INFO_FIELD_SEPERATOR;
|
||||
}
|
||||
return (info.contains(INFO_FIELD_SEPERATOR)) ? info.substring(0, info.lastIndexOf(INFO_FIELD_SEPERATOR)) : info;
|
||||
}
|
||||
|
||||
/**
|
||||
* add the genotype data
|
||||
*
|
||||
|
|
@ -358,9 +388,7 @@ public class VCFRecord {
|
|||
Map<String, VCFGenotypeRecord> gMap = genotypeListToMap(getVCFGenotypeRecords());
|
||||
|
||||
for (String genotype : header.getGenotypeSamples()) {
|
||||
|
||||
builder.append(FIELD_SEPERATOR);
|
||||
|
||||
if (gMap.containsKey(genotype)) {
|
||||
VCFGenotypeRecord rec = gMap.get(genotype);
|
||||
if (!rec.toGenotypeString(this.mAlts).equals(""))
|
||||
|
|
@ -386,7 +414,6 @@ public class VCFRecord {
|
|||
* compare two VCF records
|
||||
*
|
||||
* @param other the other VCF record
|
||||
*
|
||||
* @return true if they're equal
|
||||
*/
|
||||
public boolean equals(VCFRecord other) {
|
||||
|
|
@ -406,7 +433,6 @@ public class VCFRecord {
|
|||
* create a genotype mapping from a list and their sample names
|
||||
*
|
||||
* @param list a list of genotype samples
|
||||
*
|
||||
* @return a mapping of the sample name to VCF genotype record
|
||||
*/
|
||||
private static Map<String, VCFGenotypeRecord> genotypeListToMap(List<VCFGenotypeRecord> list) {
|
||||
|
|
|
|||
|
|
@ -91,7 +91,7 @@ public class RodVCFTest extends BaseTest {
|
|||
@Test
|
||||
public void testToString() {
|
||||
// slightly altered line, due to map ordering
|
||||
String firstLine = "20\t14370\trs6054257\tG\tA\t29.00\t0\tDP=258;AF=0.786;NS=58\tGT:GQ:DP:HQ\t0|0:48:1:51,51\t1|0:48:8:51,51\t1/1:43:5\n";
|
||||
final String firstLine = "20\t14370\trs6054257\tG\tA\t29.00\t0\tDP=258;AF=0.786;NS=58\tGT:GQ:DP:HQ\t0|0:48:1:51,51\t1|0:48:8:51,51\t1/1:43:5\n";
|
||||
RodVCF vcf = getVCFObject();
|
||||
VCFReader reader = new VCFReader(vcfFile);
|
||||
Iterator<RodVCF> iter = vcf.createIterator("VCF", vcfFile);
|
||||
|
|
|
|||
|
|
@ -0,0 +1,151 @@
|
|||
package org.broadinstitute.sting.utils.genotype.vcf;
|
||||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* <p/>
|
||||
* Class VCFGenotypeEncodingTest
|
||||
* <p/>
|
||||
* test the VCFGenotypeEncoding class
|
||||
*/
|
||||
public class VCFGenotypeEncodingTest extends BaseTest {
|
||||
@Test
|
||||
public void testDecodingSingle() {
|
||||
VCFGenotypeEncoding enc = new VCFGenotypeEncoding("A");
|
||||
Assert.assertTrue("A".equals(enc.toString()));
|
||||
Assert.assertEquals(0, enc.getLength());
|
||||
Assert.assertEquals(VCFGenotypeEncoding.TYPE.SINGLE_BASE, enc.getType());
|
||||
|
||||
VCFGenotypeEncoding enc2 = new VCFGenotypeEncoding("C");
|
||||
Assert.assertTrue("C".equals(enc2.toString()));
|
||||
Assert.assertEquals(0, enc.getLength());
|
||||
Assert.assertEquals(VCFGenotypeEncoding.TYPE.SINGLE_BASE, enc.getType());
|
||||
|
||||
VCFGenotypeEncoding enc3 = new VCFGenotypeEncoding("G");
|
||||
Assert.assertTrue("G".equals(enc3.toString()));
|
||||
Assert.assertEquals(0, enc.getLength());
|
||||
Assert.assertEquals(VCFGenotypeEncoding.TYPE.SINGLE_BASE, enc.getType());
|
||||
|
||||
VCFGenotypeEncoding enc4 = new VCFGenotypeEncoding("T");
|
||||
Assert.assertTrue("T".equals(enc4.toString()));
|
||||
Assert.assertEquals(0, enc.getLength());
|
||||
Assert.assertEquals(VCFGenotypeEncoding.TYPE.SINGLE_BASE, enc.getType());
|
||||
|
||||
VCFGenotypeEncoding enc5 = new VCFGenotypeEncoding("a");
|
||||
Assert.assertTrue("A".equals(enc5.toString()));
|
||||
Assert.assertEquals(0, enc.getLength());
|
||||
Assert.assertEquals(VCFGenotypeEncoding.TYPE.SINGLE_BASE, enc.getType());
|
||||
|
||||
VCFGenotypeEncoding enc6 = new VCFGenotypeEncoding("c");
|
||||
Assert.assertTrue("C".equals(enc6.toString()));
|
||||
Assert.assertEquals(0, enc.getLength());
|
||||
Assert.assertEquals(VCFGenotypeEncoding.TYPE.SINGLE_BASE, enc.getType());
|
||||
|
||||
VCFGenotypeEncoding enc7 = new VCFGenotypeEncoding("g");
|
||||
Assert.assertTrue("G".equals(enc7.toString()));
|
||||
Assert.assertEquals(0, enc.getLength());
|
||||
Assert.assertEquals(VCFGenotypeEncoding.TYPE.SINGLE_BASE, enc.getType());
|
||||
|
||||
VCFGenotypeEncoding enc8 = new VCFGenotypeEncoding("t");
|
||||
Assert.assertTrue("T".equals(enc8.toString()));
|
||||
Assert.assertEquals(0, enc.getLength());
|
||||
Assert.assertEquals(VCFGenotypeEncoding.TYPE.SINGLE_BASE, enc.getType());
|
||||
}
|
||||
|
||||
@Test(expected = IllegalArgumentException.class)
|
||||
public void testDecodingSingleBadBase() {
|
||||
VCFGenotypeEncoding enc = new VCFGenotypeEncoding("E");
|
||||
}
|
||||
|
||||
@Test(expected = IllegalArgumentException.class)
|
||||
public void testDecodingSingleWrongBase() {
|
||||
VCFGenotypeEncoding enc = new VCFGenotypeEncoding("I");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testValidIndel() {
|
||||
VCFGenotypeEncoding enc = new VCFGenotypeEncoding("IAGGC");
|
||||
Assert.assertEquals(4, enc.getLength());
|
||||
Assert.assertTrue(enc.getBases().equals("AGGC"));
|
||||
Assert.assertEquals(VCFGenotypeEncoding.TYPE.INSERTION, enc.getType());
|
||||
}
|
||||
|
||||
@Test(expected = IllegalArgumentException.class)
|
||||
public void testBadIndel() {
|
||||
VCFGenotypeEncoding enc = new VCFGenotypeEncoding("IAGRC");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testValidDel() {
|
||||
VCFGenotypeEncoding enc = new VCFGenotypeEncoding("D40");
|
||||
Assert.assertEquals(40, enc.getLength());
|
||||
Assert.assertTrue(enc.getBases().equals(""));
|
||||
Assert.assertEquals(VCFGenotypeEncoding.TYPE.DELETION, enc.getType());
|
||||
}
|
||||
|
||||
@Test(expected = IllegalArgumentException.class)
|
||||
public void testBadDel() {
|
||||
VCFGenotypeEncoding enc = new VCFGenotypeEncoding("DAGCT");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testValidNoCall() {
|
||||
VCFGenotypeEncoding enc = new VCFGenotypeEncoding(".");
|
||||
Assert.assertEquals(0, enc.getLength());
|
||||
Assert.assertTrue(enc.getBases().equals("."));
|
||||
Assert.assertEquals(VCFGenotypeEncoding.TYPE.UNCALLED, enc.getType());
|
||||
}
|
||||
|
||||
@Test(expected = IllegalArgumentException.class)
|
||||
public void testBadNoCall() {
|
||||
VCFGenotypeEncoding enc = new VCFGenotypeEncoding("..");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testEquals() {
|
||||
VCFGenotypeEncoding enc = new VCFGenotypeEncoding("A");
|
||||
VCFGenotypeEncoding enc2 = new VCFGenotypeEncoding("A");
|
||||
VCFGenotypeEncoding enc3 = new VCFGenotypeEncoding("C");
|
||||
Assert.assertTrue(enc.equals(enc2));
|
||||
Assert.assertTrue(!enc.equals(enc3));
|
||||
enc = new VCFGenotypeEncoding("D40");
|
||||
enc2 = new VCFGenotypeEncoding("D40");
|
||||
enc3 = new VCFGenotypeEncoding("D41");
|
||||
Assert.assertTrue(enc.equals(enc2));
|
||||
Assert.assertTrue(!enc.equals(enc3));
|
||||
enc = new VCFGenotypeEncoding("IAAC");
|
||||
enc2 = new VCFGenotypeEncoding("IAAC");
|
||||
enc3 = new VCFGenotypeEncoding("IACG");
|
||||
Assert.assertTrue(enc.equals(enc2));
|
||||
Assert.assertTrue(!enc.equals(enc3));
|
||||
enc = new VCFGenotypeEncoding(".");
|
||||
enc2 = new VCFGenotypeEncoding(".");
|
||||
Assert.assertTrue(enc.equals(enc2));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHashCode() {
|
||||
VCFGenotypeEncoding enc = new VCFGenotypeEncoding("A");
|
||||
VCFGenotypeEncoding enc2 = new VCFGenotypeEncoding("A");
|
||||
VCFGenotypeEncoding enc3 = new VCFGenotypeEncoding("C");
|
||||
Assert.assertTrue(enc.hashCode() == enc2.hashCode());
|
||||
Assert.assertTrue(enc.hashCode() != enc3.hashCode());
|
||||
enc = new VCFGenotypeEncoding("D40");
|
||||
enc2 = new VCFGenotypeEncoding("D40");
|
||||
enc3 = new VCFGenotypeEncoding("D41");
|
||||
Assert.assertTrue(enc.hashCode() == enc2.hashCode());
|
||||
Assert.assertTrue(enc.hashCode() != enc3.hashCode());
|
||||
enc = new VCFGenotypeEncoding("IAAC");
|
||||
enc2 = new VCFGenotypeEncoding("IAAC");
|
||||
enc3 = new VCFGenotypeEncoding("IACG");
|
||||
Assert.assertTrue(enc.hashCode() == enc2.hashCode());
|
||||
Assert.assertTrue(enc.hashCode() != enc3.hashCode());
|
||||
enc = new VCFGenotypeEncoding(".");
|
||||
enc2 = new VCFGenotypeEncoding(".");
|
||||
Assert.assertTrue(enc.hashCode() == enc2.hashCode());
|
||||
}
|
||||
}
|
||||
|
|
@ -37,12 +37,12 @@ public class VCFReaderTest extends BaseTest {
|
|||
public void testBasicParsing() {
|
||||
String formatString = "GT:B:C:D";
|
||||
String genotypeString = "0|1:2:3:4";
|
||||
String altAlleles[] = {"A","C","G","T"};
|
||||
char referenceBase = 'N';
|
||||
String altAlleles[] = {"A","G","T"};
|
||||
char referenceBase = 'C';
|
||||
VCFGenotypeRecord rec = VCFReader.getVCFGenotype("test",formatString,genotypeString,altAlleles,referenceBase);
|
||||
Assert.assertEquals(VCFGenotypeRecord.PHASE.PHASED,rec.getPhaseType());
|
||||
Assert.assertEquals("N",rec.getAlleles().get(0));
|
||||
Assert.assertEquals("A",rec.getAlleles().get(1));
|
||||
Assert.assertEquals("C",rec.getAlleles().get(0).toString());
|
||||
Assert.assertEquals("A",rec.getAlleles().get(1).toString());
|
||||
Map<String,String> values = rec.getFields();
|
||||
Assert.assertEquals(3,values.size());
|
||||
Assert.assertTrue(values.get("B").equals("2"));
|
||||
|
|
@ -58,12 +58,12 @@ public class VCFReaderTest extends BaseTest {
|
|||
public void testMissingFieldParsing() {
|
||||
String formatString = "GT:B:C:D";
|
||||
String genotypeString = "0|1:::4";
|
||||
String altAlleles[] = {"A","C","G","T"};
|
||||
char referenceBase = 'N';
|
||||
String altAlleles[] = {"A","G","T"};
|
||||
char referenceBase = 'C';
|
||||
VCFGenotypeRecord rec = VCFReader.getVCFGenotype("test",formatString,genotypeString,altAlleles,referenceBase);
|
||||
Assert.assertEquals(VCFGenotypeRecord.PHASE.PHASED,rec.getPhaseType());
|
||||
Assert.assertEquals("N",rec.getAlleles().get(0));
|
||||
Assert.assertEquals("A",rec.getAlleles().get(1));
|
||||
Assert.assertEquals("C",rec.getAlleles().get(0).toString());
|
||||
Assert.assertEquals("A",rec.getAlleles().get(1).toString());
|
||||
Map<String,String> values = rec.getFields();
|
||||
Assert.assertEquals(3,values.size());
|
||||
Assert.assertTrue(values.get("B").equals(""));
|
||||
|
|
@ -78,12 +78,12 @@ public class VCFReaderTest extends BaseTest {
|
|||
public void testMissingAllFields() {
|
||||
String formatString = "GT:B:C:D";
|
||||
String genotypeString = "0|1:::";
|
||||
String altAlleles[] = {"A","C","G","T"};
|
||||
char referenceBase = 'N';
|
||||
String altAlleles[] = {"A","G","T"};
|
||||
char referenceBase = 'C';
|
||||
VCFGenotypeRecord rec = VCFReader.getVCFGenotype("test",formatString,genotypeString,altAlleles,referenceBase);
|
||||
Assert.assertEquals(VCFGenotypeRecord.PHASE.PHASED,rec.getPhaseType());
|
||||
Assert.assertEquals("N",rec.getAlleles().get(0));
|
||||
Assert.assertEquals("A",rec.getAlleles().get(1));
|
||||
Assert.assertEquals("C",rec.getAlleles().get(0).toString());
|
||||
Assert.assertEquals("A",rec.getAlleles().get(1).toString());
|
||||
Map<String,String> values = rec.getFields();
|
||||
Assert.assertEquals(3,values.size());
|
||||
Assert.assertTrue(values.get("B").equals(""));
|
||||
|
|
|
|||
|
|
@ -11,36 +11,147 @@ import java.util.Map;
|
|||
|
||||
|
||||
/**
|
||||
*
|
||||
* @author aaron
|
||||
*
|
||||
* Class VCFRecordTest
|
||||
*
|
||||
* test the basic functionality of the vcf record
|
||||
* @author aaron
|
||||
* <p/>
|
||||
* Class VCFRecordTest
|
||||
* <p/>
|
||||
* test the basic functionality of the vcf record
|
||||
*/
|
||||
public class VCFRecordTest extends BaseTest {
|
||||
|
||||
private VCFRecord makeFakeVCFRecord() {
|
||||
List<String> altBases = new ArrayList<String>();
|
||||
altBases.add("C");
|
||||
altBases.add("D1");
|
||||
Map<String,String> infoFields = new HashMap<String,String>();
|
||||
infoFields.put("DP","50");
|
||||
/**
|
||||
* create a fake VCF record
|
||||
*
|
||||
* @return a VCFRecord
|
||||
*/
|
||||
private static VCFRecord makeFakeVCFRecord(Map<String, String> infoFields) {
|
||||
List<VCFGenotypeEncoding> altBases = new ArrayList<VCFGenotypeEncoding>();
|
||||
altBases.add(new VCFGenotypeEncoding("C"));
|
||||
altBases.add(new VCFGenotypeEncoding("D1"));
|
||||
List<VCFGenotypeRecord> genotypeObjects = new ArrayList<VCFGenotypeRecord>();
|
||||
Map<String, String> keyValues = new HashMap<String,String>();
|
||||
keyValues.put("AA","2");
|
||||
List<String> Alleles = new ArrayList<String>();
|
||||
Alleles.add("A");
|
||||
genotypeObjects.add(new VCFGenotypeRecord("SampleName", Alleles, VCFGenotypeRecord.PHASE.PHASED, keyValues));
|
||||
return new VCFRecord('A',"chr1",1,"RANDOM",altBases,0,".",infoFields, "GT:AA",genotypeObjects);
|
||||
genotypeObjects.add(createGenotype("sample1", "A", "A"));
|
||||
return new VCFRecord('A', "chr1", 1, "RANDOM", altBases, 0, ".", infoFields, "GT:AA", genotypeObjects);
|
||||
}
|
||||
|
||||
/**
|
||||
* create a fake VCF genotype record
|
||||
*
|
||||
* @param name the name of the sample
|
||||
* @param Allele1 the first allele
|
||||
* @param Allele2 the second allele
|
||||
* @return a VCFGenotypeRecord
|
||||
*/
|
||||
private static VCFGenotypeRecord createGenotype(String name, String Allele1, String Allele2) {
|
||||
Map<String, String> keyValues = new HashMap<String, String>();
|
||||
keyValues.put("AA", "2");
|
||||
List<VCFGenotypeEncoding> Alleles = new ArrayList<VCFGenotypeEncoding>();
|
||||
Alleles.add(new VCFGenotypeEncoding(Allele1));
|
||||
Alleles.add(new VCFGenotypeEncoding(Allele2));
|
||||
return new VCFGenotypeRecord(name, Alleles, VCFGenotypeRecord.PHASE.PHASED, keyValues);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAddReduntantAlts() {
|
||||
List<VCFGenotypeEncoding> altBases = new ArrayList<VCFGenotypeEncoding>();
|
||||
altBases.add(new VCFGenotypeEncoding("C"));
|
||||
altBases.add(new VCFGenotypeEncoding("D1"));
|
||||
altBases.add(new VCFGenotypeEncoding("D1"));
|
||||
List<VCFGenotypeRecord> genotypeObjects = new ArrayList<VCFGenotypeRecord>();
|
||||
genotypeObjects.add(createGenotype("sample1", "A", "A"));
|
||||
VCFRecord rec = new VCFRecord('A', "chr1", 1, "RANDOM", altBases, 0, ".", new HashMap<String,String>(), "GT:AA", genotypeObjects);
|
||||
Assert.assertEquals(2, rec.getAlternateAlleles().size());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetOneGenotype() {
|
||||
Map<String, String> infoFields = new HashMap<String, String>();
|
||||
VCFRecord rec = makeFakeVCFRecord(infoFields);
|
||||
List<VCFGenotypeRecord> genotypeObjects = rec.getVCFGenotypeRecords();
|
||||
Assert.assertEquals(1, genotypeObjects.size());
|
||||
Assert.assertTrue(genotypeObjects.get(0).getSampleName().equals("sample1"));
|
||||
Assert.assertEquals(2, genotypeObjects.get(0).getAlleles().size());
|
||||
Assert.assertEquals("A", genotypeObjects.get(0).getAlleles().get(0).toString());
|
||||
Assert.assertEquals("A", genotypeObjects.get(0).getAlleles().get(1).toString());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetGenotypes() {
|
||||
Map<String, String> infoFields = new HashMap<String, String>();
|
||||
VCFRecord rec = makeFakeVCFRecord(infoFields);
|
||||
rec.addGenotypeField(createGenotype("sample2", "C", "A"));
|
||||
List<VCFGenotypeRecord> genotypeObjects = rec.getVCFGenotypeRecords();
|
||||
Assert.assertEquals(2, genotypeObjects.size());
|
||||
Assert.assertTrue(genotypeObjects.get(0).getSampleName().equals("sample1"));
|
||||
Assert.assertEquals(2, genotypeObjects.get(0).getAlleles().size());
|
||||
Assert.assertEquals("A", genotypeObjects.get(0).getAlleles().get(0).toString());
|
||||
Assert.assertEquals("A", genotypeObjects.get(0).getAlleles().get(1).toString());
|
||||
|
||||
// assert the second one
|
||||
Assert.assertTrue(genotypeObjects.get(1).getSampleName().equals("sample2"));
|
||||
Assert.assertEquals(2, genotypeObjects.get(1).getAlleles().size());
|
||||
Assert.assertEquals("C", genotypeObjects.get(1).getAlleles().get(0).toString());
|
||||
Assert.assertEquals("A", genotypeObjects.get(1).getAlleles().get(1).toString());
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCreateInfoString() {
|
||||
Map<String, String> infoFields = new HashMap<String, String>();
|
||||
VCFRecord rec = makeFakeVCFRecord(infoFields);
|
||||
Assert.assertTrue(rec.createInfoString().equals("."));
|
||||
infoFields.put("DP", "50");
|
||||
VCFRecord rec2 = makeFakeVCFRecord(infoFields);
|
||||
Assert.assertTrue(rec2.createInfoString().equals("DP=50"));
|
||||
rec2.addInfoField("AB", "CD");
|
||||
Assert.assertTrue(rec2.createInfoString().equals("DP=50;AB=CD") || rec2.createInfoString().equals("AB=CD;DP=50"));
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testGetGenotypes() {
|
||||
VCFRecord rec = makeFakeVCFRecord();
|
||||
List<VCFGenotypeRecord> genotypeObjects = rec.getVCFGenotypeRecords();
|
||||
Assert.assertEquals(1,genotypeObjects.size());
|
||||
Assert.assertTrue(genotypeObjects.get(0).getSampleName().equals("SampleName"));
|
||||
public void testAddAlts() {
|
||||
Map<String, String> infoFields = new HashMap<String, String>();
|
||||
VCFRecord rec = makeFakeVCFRecord(infoFields);
|
||||
rec.addAlternateBase(new VCFGenotypeEncoding("T"));
|
||||
rec.addAlternateBase(new VCFGenotypeEncoding("T"));
|
||||
rec.addAlternateBase(new VCFGenotypeEncoding("T"));
|
||||
rec.addAlternateBase(new VCFGenotypeEncoding("T"));
|
||||
rec.addAlternateBase(new VCFGenotypeEncoding("T"));
|
||||
Assert.assertEquals(3,rec.getAlternateAlleles().size());
|
||||
}
|
||||
|
||||
/**
|
||||
* create a fake header of known quantity
|
||||
*
|
||||
* @return a fake VCF header
|
||||
*/
|
||||
public static VCFHeader createFakeHeader() {
|
||||
Map<String, String> metaData = new HashMap();
|
||||
List<String> additionalColumns = new ArrayList<String>();
|
||||
metaData.put("format", "VCRv3.2"); // required
|
||||
metaData.put("two", "2");
|
||||
additionalColumns.add("FORMAT");
|
||||
additionalColumns.add("sample1");
|
||||
return new VCFHeader(metaData, additionalColumns);
|
||||
}
|
||||
|
||||
private static final String stringRep = "chr1\t1\tRANDOM\tA\tC,D1\t0.00\t.\tDP=50\tGT:AA\t0|0:2";
|
||||
private static final String stringRep2 = "chr1\t1\tRANDOM\tA\tC,D1\t0.00\t.\tAB=CD;DP=50\tGT:AA\t0|0:2";
|
||||
//private static final String stringRep3 = "chr1\t1\tRANDOM\tA\tC,D1\t0.00\t.\tAB=CD;DP=50\tGT:AA\t0|0:2";
|
||||
|
||||
@Test
|
||||
public void testStringRepresentation() {
|
||||
Map<String, String> infoFields = new HashMap<String, String>();
|
||||
infoFields.put("DP", "50");
|
||||
VCFRecord rec = makeFakeVCFRecord(infoFields);
|
||||
Map<String, String> metaData = new HashMap<String, String>();
|
||||
List<String> additionalColumns = new ArrayList<String>();
|
||||
String rep = rec.toStringRepresentation(createFakeHeader());
|
||||
Assert.assertTrue(stringRep.equals(rep));
|
||||
rec.addInfoField("AB", "CD");
|
||||
String rep2 = rec.toStringRepresentation(createFakeHeader());
|
||||
Assert.assertTrue(stringRep2.equals(rep2));
|
||||
//rec.addGenotypeField(createGenotype("sample3","A","D12"));
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -24,7 +24,7 @@ public class VCFWriterTest extends BaseTest {
|
|||
/** test, using the writer and reader, that we can output and input a VCF file without problems */
|
||||
@Test
|
||||
public void testBasicWriteAndRead() {
|
||||
VCFHeader header = createFakeHeader();
|
||||
VCFHeader header = createFakeHeader(metaData,additionalColumns);
|
||||
VCFWriter writer = new VCFWriter(header,fakeVCFFile);
|
||||
writer.addRecord(createVCFRecord(header));
|
||||
writer.addRecord(createVCFRecord(header));
|
||||
|
|
@ -45,7 +45,7 @@ public class VCFWriterTest extends BaseTest {
|
|||
* create a fake header of known quantity
|
||||
* @return a fake VCF header
|
||||
*/
|
||||
private VCFHeader createFakeHeader() {
|
||||
public static VCFHeader createFakeHeader(Map<String, String> metaData, List<String> additionalColumns) {
|
||||
metaData.put("format", "VCRv3.2"); // required
|
||||
metaData.put("two", "2");
|
||||
additionalColumns.add("FORMAT");
|
||||
|
|
@ -60,9 +60,9 @@ public class VCFWriterTest extends BaseTest {
|
|||
* @return a VCFRecord
|
||||
*/
|
||||
private VCFRecord createVCFRecord(VCFHeader header) {
|
||||
List<String> altBases = new ArrayList<String>();
|
||||
altBases.add("C");
|
||||
altBases.add("D1");
|
||||
List<VCFGenotypeEncoding> altBases = new ArrayList<VCFGenotypeEncoding>();
|
||||
altBases.add(new VCFGenotypeEncoding("C"));
|
||||
altBases.add(new VCFGenotypeEncoding("D1"));
|
||||
Map<String,String> infoFields = new HashMap<String,String>();
|
||||
infoFields.put("DP","50");
|
||||
|
||||
|
|
@ -71,9 +71,9 @@ public class VCFWriterTest extends BaseTest {
|
|||
Map<String,String> str = new HashMap<String,String>();
|
||||
str.put("bb","0");
|
||||
|
||||
List<String> myAlleles = new ArrayList<String>();
|
||||
myAlleles.add("C");
|
||||
myAlleles.add("D1");
|
||||
List<VCFGenotypeEncoding> myAlleles = new ArrayList<VCFGenotypeEncoding>();
|
||||
myAlleles.add(new VCFGenotypeEncoding("C"));
|
||||
myAlleles.add(new VCFGenotypeEncoding("D1"));
|
||||
gt.add(new VCFGenotypeRecord(name, myAlleles, VCFGenotypeRecord.PHASE.PHASED, str));
|
||||
}
|
||||
return new VCFRecord('A',"chr1",1,"RANDOM",altBases,0,".",infoFields, "GT:AA",gt);
|
||||
|
|
|
|||
Loading…
Reference in New Issue