Fixed the sample mix-up bug Kiran discovered, and added a unit test in the VCF reader class (Thanks for the good example files Kiran). Also renamed the toStringRepresentation function to toStringEncoding, and added a matching method in VCFGenotypeRecord.
Updated the integration tests that were failing to due to different ordering of genotyping entries in VCF, I'll check in the VCF diff tool I wrote when I get a cycle or two. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2092 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
b4babb82eb
commit
6ba1f3321d
|
|
@ -59,7 +59,7 @@ public class RodVCF extends BasicReferenceOrderedDatum implements VariationRod,
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
if (this.mCurrentRecord != null)
|
if (this.mCurrentRecord != null)
|
||||||
return this.mCurrentRecord.toStringRepresentation(mReader.getHeader());
|
return this.mCurrentRecord.toStringEncoding(mReader.getHeader());
|
||||||
else
|
else
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,85 +0,0 @@
|
||||||
package org.broadinstitute.sting.playground.gatk.walkers.vcftools;
|
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.RefWalker;
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.RodVCF;
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
|
||||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
|
||||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord;
|
|
||||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeRecord;
|
|
||||||
|
|
||||||
import java.io.FileReader;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.BufferedReader;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
|
|
||||||
public class VCFMixedUp extends RefWalker<Integer, Integer> {
|
|
||||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
|
||||||
for (ReferenceOrderedDatum rod : tracker.getAllRods()) {
|
|
||||||
if (rod instanceof RodVCF) {
|
|
||||||
for (String rodfile : this.getToolkit().getArguments().RODBindings) {
|
|
||||||
String[] rodpieces = rodfile.split(",");
|
|
||||||
|
|
||||||
if (rodpieces[2].contains(".vcf")) {
|
|
||||||
try {
|
|
||||||
FileReader reader = new FileReader(rodpieces[2]);
|
|
||||||
BufferedReader breader = new BufferedReader(reader);
|
|
||||||
|
|
||||||
ArrayList<String> header = new ArrayList<String>();
|
|
||||||
ArrayList<String> variant = new ArrayList<String>();
|
|
||||||
|
|
||||||
String line;
|
|
||||||
while ((line = breader.readLine()) != null) {
|
|
||||||
String[] pieces = line.split("\\s+");
|
|
||||||
|
|
||||||
if (line.contains("##")) {
|
|
||||||
} else if (line.contains("#CHROM")) {
|
|
||||||
for (int i = 0; i < pieces.length; i++) {
|
|
||||||
header.add(i, pieces[i]);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
for (int i = 0; i < pieces.length; i++) {
|
|
||||||
variant.add(i, pieces[i]);
|
|
||||||
}
|
|
||||||
|
|
||||||
for (int i = 0; i < pieces.length; i++) {
|
|
||||||
if (i < 9 || header.get(i).contains("NA12892") || header.get(i).contains("NA10851")) {
|
|
||||||
out.printf("%s => %s\n", header.get(i), variant.get(i));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} catch (IOException e) {}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
RodVCF vcfrod = (RodVCF) rod;
|
|
||||||
VCFRecord record = vcfrod.mCurrentRecord;
|
|
||||||
|
|
||||||
out.println();
|
|
||||||
|
|
||||||
for (VCFGenotypeRecord gr : record.getVCFGenotypeRecords()) {
|
|
||||||
if (gr.getSampleName().equalsIgnoreCase("NA12892") || gr.getSampleName().equalsIgnoreCase("NA10851")) {
|
|
||||||
out.printf("%s: %s:%s:%s:%s\n", gr.getSampleName(),
|
|
||||||
gr.toGenotypeString(record.getAlternateAlleles()),
|
|
||||||
gr.getFields().get("GQ"),
|
|
||||||
gr.getFields().get("DP"),
|
|
||||||
gr.getFields().get("HQ"));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
public Integer reduceInit() {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
public Integer reduce(Integer value, Integer sum) {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -1,6 +1,5 @@
|
||||||
package org.broadinstitute.sting.playground.gatk.walkers.vcftools;
|
package org.broadinstitute.sting.playground.gatk.walkers.vcftools;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.RefWalker;
|
import org.broadinstitute.sting.gatk.walkers.RefWalker;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||||
|
|
@ -9,7 +8,6 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.utils.genotype.vcf.*;
|
import org.broadinstitute.sting.utils.genotype.vcf.*;
|
||||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||||
import org.broadinstitute.sting.utils.StingException;
|
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
|
|
@ -64,7 +62,7 @@ public class VCFSubsetWalker extends RefWalker<ArrayList<VCFRecord>, VCFWriter>
|
||||||
initializeWriter();
|
initializeWriter();
|
||||||
}
|
}
|
||||||
|
|
||||||
//out.println(record.toStringRepresentation(vcfrod.getHeader()));
|
//out.println(record.toStringEncoding(vcfrod.getHeader()));
|
||||||
|
|
||||||
records.add(record);
|
records.add(record);
|
||||||
}
|
}
|
||||||
|
|
@ -117,7 +115,7 @@ public class VCFSubsetWalker extends RefWalker<ArrayList<VCFRecord>, VCFWriter>
|
||||||
if (writer != null) {
|
if (writer != null) {
|
||||||
writer.addRecord(subset);
|
writer.addRecord(subset);
|
||||||
} else {
|
} else {
|
||||||
out.println(subset.toStringRepresentation(vheader));
|
out.println(subset.toStringEncoding(vheader));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -86,7 +86,7 @@ public class VCFGenotypeRecord {
|
||||||
return mFields;
|
return mFields;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String toGenotypeString(List<VCFGenotypeEncoding> altAlleles) {
|
private String toGenotypeString(List<VCFGenotypeEncoding> altAlleles) {
|
||||||
String str = "";
|
String str = "";
|
||||||
boolean first = true;
|
boolean first = true;
|
||||||
for (VCFGenotypeEncoding allele : mGenotypeAlleles) {
|
for (VCFGenotypeEncoding allele : mGenotypeAlleles) {
|
||||||
|
|
@ -133,4 +133,24 @@ public class VCFGenotypeRecord {
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* output a string representation of the VCFGenotypeRecord, given the alternate alleles
|
||||||
|
*
|
||||||
|
* @param altAlleles the alternate alleles, needed for toGenotypeString()
|
||||||
|
*
|
||||||
|
* @return a string
|
||||||
|
*/
|
||||||
|
public String toStringEncoding(List<VCFGenotypeEncoding> altAlleles) {
|
||||||
|
StringBuilder builder = new StringBuilder();
|
||||||
|
builder.append(toGenotypeString(altAlleles));
|
||||||
|
boolean first = true;
|
||||||
|
for (String field : mFields.keySet()) {
|
||||||
|
if (mFields.get(field).equals("")) continue;
|
||||||
|
builder.append(VCFRecord.GENOTYPE_FIELD_SEPERATOR);
|
||||||
|
builder.append(mFields.get(field));
|
||||||
|
|
||||||
|
}
|
||||||
|
return builder.toString();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -21,7 +21,7 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter {
|
||||||
private VCFHeader mHeader = null;
|
private VCFHeader mHeader = null;
|
||||||
private String mSource;
|
private String mSource;
|
||||||
private String mReferenceName;
|
private String mReferenceName;
|
||||||
private final Set<String> mSampleNames = new HashSet<String>();
|
private final Set<String> mSampleNames = new LinkedHashSet<String>();
|
||||||
|
|
||||||
/** our log, which we want to capture anything from this class */
|
/** our log, which we want to capture anything from this class */
|
||||||
protected static Logger logger = Logger.getLogger(VCFGenotypeWriterAdapter.class);
|
protected static Logger logger = Logger.getLogger(VCFGenotypeWriterAdapter.class);
|
||||||
|
|
|
||||||
|
|
@ -23,7 +23,7 @@ public class VCFHeader {
|
||||||
private final Map<String, String> mMetaData = new HashMap<String, String>();
|
private final Map<String, String> mMetaData = new HashMap<String, String>();
|
||||||
|
|
||||||
// the list of auxillary tags
|
// the list of auxillary tags
|
||||||
private final Set<String> mGenotypeSampleNames = new HashSet<String>();
|
private final Set<String> mGenotypeSampleNames = new LinkedHashSet<String>();
|
||||||
|
|
||||||
// the character string that indicates meta data
|
// the character string that indicates meta data
|
||||||
public static final String METADATA_INDICATOR = "##";
|
public static final String METADATA_INDICATOR = "##";
|
||||||
|
|
|
||||||
|
|
@ -124,9 +124,8 @@ public class VCFReader implements Iterator<VCFRecord>, Iterable<VCFRecord> {
|
||||||
* @return a VCF Header created from the list of stinrgs
|
* @return a VCF Header created from the list of stinrgs
|
||||||
*/
|
*/
|
||||||
protected VCFHeader createHeader(List<String> headerStrings) {
|
protected VCFHeader createHeader(List<String> headerStrings) {
|
||||||
|
|
||||||
Map<String, String> metaData = new HashMap<String, String>();
|
Map<String, String> metaData = new HashMap<String, String>();
|
||||||
Set<String> auxTags = new HashSet<String>();
|
Set<String> auxTags = new LinkedHashSet<String>();
|
||||||
// iterate over all the passed in strings
|
// iterate over all the passed in strings
|
||||||
for (String str : headerStrings) {
|
for (String str : headerStrings) {
|
||||||
Matcher matcher = pMeta.matcher(str);
|
Matcher matcher = pMeta.matcher(str);
|
||||||
|
|
|
||||||
|
|
@ -365,7 +365,7 @@ public class VCFRecord {
|
||||||
* @param header the VCF header for this VCF Record
|
* @param header the VCF header for this VCF Record
|
||||||
* @return a string
|
* @return a string
|
||||||
*/
|
*/
|
||||||
public String toStringRepresentation(VCFHeader header) {
|
public String toStringEncoding(VCFHeader header) {
|
||||||
StringBuilder builder = new StringBuilder();
|
StringBuilder builder = new StringBuilder();
|
||||||
|
|
||||||
// CHROM \t POS \t ID \t REF \t ALT \t QUAL \t FILTER \t INFO
|
// CHROM \t POS \t ID \t REF \t ALT \t QUAL \t FILTER \t INFO
|
||||||
|
|
@ -425,13 +425,7 @@ public class VCFRecord {
|
||||||
builder.append(FIELD_SEPERATOR);
|
builder.append(FIELD_SEPERATOR);
|
||||||
if (gMap.containsKey(genotype)) {
|
if (gMap.containsKey(genotype)) {
|
||||||
VCFGenotypeRecord rec = gMap.get(genotype);
|
VCFGenotypeRecord rec = gMap.get(genotype);
|
||||||
if (!rec.toGenotypeString(this.mAlts).equals(""))
|
builder.append(rec.toStringEncoding(this.mAlts));
|
||||||
builder.append(rec.toGenotypeString(this.mAlts));
|
|
||||||
for (String s : rec.getFields().keySet()) {
|
|
||||||
if (rec.getFields().get(s).equals("")) continue;
|
|
||||||
builder.append(GENOTYPE_FIELD_SEPERATOR);
|
|
||||||
builder.append(rec.getFields().get(s));
|
|
||||||
}
|
|
||||||
gMap.remove(genotype);
|
gMap.remove(genotype);
|
||||||
} else {
|
} else {
|
||||||
builder.append(VCFGenotypeRecord.EMPTY_GENOTYPE);
|
builder.append(VCFGenotypeRecord.EMPTY_GENOTYPE);
|
||||||
|
|
|
||||||
|
|
@ -74,7 +74,7 @@ public class VCFWriter {
|
||||||
* @param record the record to output
|
* @param record the record to output
|
||||||
*/
|
*/
|
||||||
public void addRecord(VCFRecord record) {
|
public void addRecord(VCFRecord record) {
|
||||||
String vcfString = record.toStringRepresentation(mHeader);
|
String vcfString = record.toStringEncoding(mHeader);
|
||||||
try {
|
try {
|
||||||
mWriter.write(vcfString + "\n");
|
mWriter.write(vcfString + "\n");
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
|
|
|
||||||
|
|
@ -99,13 +99,13 @@ public class RodVCFTest extends BaseTest {
|
||||||
while (iter.hasNext()) {
|
while (iter.hasNext()) {
|
||||||
VCFRecord rec1 = reader.next();
|
VCFRecord rec1 = reader.next();
|
||||||
VCFRecord rec2 = iter.next().mCurrentRecord;
|
VCFRecord rec2 = iter.next().mCurrentRecord;
|
||||||
if (!rec1.toStringRepresentation(mHeader).equals(rec2.toStringRepresentation(mHeader))) {
|
if (!rec1.toStringEncoding(mHeader).equals(rec2.toStringEncoding(mHeader))) {
|
||||||
fail("VCF record rec1.toString() != rec2.toString()");
|
fail("VCF record rec1.toStringEncoding() != rec2.toStringEncoding()");
|
||||||
}
|
}
|
||||||
// verify the first line too
|
// verify the first line too
|
||||||
if (first) {
|
if (first) {
|
||||||
if (!firstLine.equals(rec1.toStringRepresentation(mHeader) + "\n")) {
|
if (!firstLine.equals(rec1.toStringEncoding(mHeader) + "\n")) {
|
||||||
fail("VCF record rec1.toString() != expected string :\n" + rec1.toStringRepresentation(mHeader) + firstLine);
|
fail("VCF record rec1.toStringEncoding() != expected string :\n" + rec1.toStringEncoding(mHeader) + firstLine);
|
||||||
}
|
}
|
||||||
first = false;
|
first = false;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -50,7 +50,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
||||||
public void testHasAnnotsNotAsking1() {
|
public void testHasAnnotsNotAsking1() {
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
baseTestString() + " -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample2.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
|
baseTestString() + " -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample2.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
|
||||||
Arrays.asList("4d7e6670ec71e3b7065ca7e25855189d"));
|
Arrays.asList("0ff01251afdabafb4e137357b25be72a"));
|
||||||
executeTest("test file has annotations, not asking for annotations, #1", spec);
|
executeTest("test file has annotations, not asking for annotations, #1", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -66,7 +66,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
||||||
public void testHasAnnotsAsking1() {
|
public void testHasAnnotsAsking1() {
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
baseTestString() + " -all -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample2.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
|
baseTestString() + " -all -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample2.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
|
||||||
Arrays.asList("94103b529e013748696be3e5dd0d90ee"));
|
Arrays.asList("ac80f5a5ab06037f938861effcceb66f"));
|
||||||
executeTest("test file has annotations, asking for annotations, #1", spec);
|
executeTest("test file has annotations, asking for annotations, #1", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -82,7 +82,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
||||||
public void testNoAnnotsNotAsking1() {
|
public void testNoAnnotsNotAsking1() {
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
baseTestString() + " -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample2empty.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
|
baseTestString() + " -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample2empty.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
|
||||||
Arrays.asList("386ab9b98b708a840a1ae915624777d5"));
|
Arrays.asList("18d85a9711d56bdf7e2327b83d6745e2"));
|
||||||
executeTest("test file doesn't have annotations, not asking for annotations, #1", spec);
|
executeTest("test file doesn't have annotations, not asking for annotations, #1", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -98,7 +98,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
||||||
public void testNoAnnotsAsking1() {
|
public void testNoAnnotsAsking1() {
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
baseTestString() + " -all -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample2empty.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
|
baseTestString() + " -all -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample2empty.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
|
||||||
Arrays.asList("cf2f60758269759f7be79ad18b12771b"));
|
Arrays.asList("cda6af61023e67ce8cc8d57cc6cd155b"));
|
||||||
executeTest("test file doesn't have annotations, asking for annotations, #1", spec);
|
executeTest("test file doesn't have annotations, asking for annotations, #1", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -30,7 +30,7 @@ public class CallsetConcordanceIntegrationTest extends WalkerTest {
|
||||||
public void testNWayVenn() {
|
public void testNWayVenn() {
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
baseTestString() + " -B set1,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.example1.vcf -B set2,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.example2.vcf -B set3,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/CEU.sample.vcf -CT NWayVenn", 1,
|
baseTestString() + " -B set1,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.example1.vcf -B set2,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.example2.vcf -B set3,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/CEU.sample.vcf -CT NWayVenn", 1,
|
||||||
Arrays.asList("e067aace9080bafdf3312632de60b4fc"));
|
Arrays.asList("9a5910137b6b9745f6e0c3ee711a6bfa"));
|
||||||
executeTest("testNWayVenn", spec);
|
executeTest("testNWayVenn", spec);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -1,18 +1,22 @@
|
||||||
package org.broadinstitute.sting.utils.genotype.vcf;
|
package org.broadinstitute.sting.utils.genotype.vcf;
|
||||||
|
|
||||||
import org.broadinstitute.sting.BaseTest;
|
import org.broadinstitute.sting.BaseTest;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.RodVCF;
|
||||||
import org.junit.Assert;
|
import org.junit.Assert;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.*;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Iterator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
/** test the VCFReader class test */
|
/** test the VCFReader class test */
|
||||||
public class VCFReaderTest extends BaseTest {
|
public class VCFReaderTest extends BaseTest {
|
||||||
|
|
||||||
private static File vcfFile = new File("/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample.vcf");
|
private static final File vcfFile = new File("/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample.vcf");
|
||||||
private static File multiSampleVCF = new File("/humgen/gsa-scr1/GATK_Data/Validation_Data/MultiSample.vcf");
|
private static final File multiSampleVCF = new File("/humgen/gsa-scr1/GATK_Data/Validation_Data/MultiSample.vcf");
|
||||||
|
private static final String VCF_MIXUP_FILE = "/humgen/gsa-scr1/GATK_Data/Validation_Data/mixedup.vcf";
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testVCFInput() {
|
public void testVCFInput() {
|
||||||
|
|
@ -59,12 +63,12 @@ public class VCFReaderTest extends BaseTest {
|
||||||
if (!reader.hasNext()) Assert.fail("The reader should have a record");
|
if (!reader.hasNext()) Assert.fail("The reader should have a record");
|
||||||
VCFRecord rec = reader.next();
|
VCFRecord rec = reader.next();
|
||||||
boolean seenNA11992 = false;
|
boolean seenNA11992 = false;
|
||||||
boolean seenNA12287 = false;
|
boolean seenNA12287 = false;
|
||||||
for (VCFGenotypeRecord record : rec.getVCFGenotypeRecords()) {
|
for (VCFGenotypeRecord record : rec.getVCFGenotypeRecords()) {
|
||||||
if (record.getSampleName().equals("NA11992")) {
|
if (record.getSampleName().equals("NA11992")) {
|
||||||
List<VCFGenotypeEncoding> encodings = record.getAlleles();
|
List<VCFGenotypeEncoding> encodings = record.getAlleles();
|
||||||
if (!encodings.get(0).getBases().equals("A") ||
|
if (!encodings.get(0).getBases().equals("A") ||
|
||||||
!encodings.get(1).getBases().equals("A")) {
|
!encodings.get(1).getBases().equals("A")) {
|
||||||
Assert.fail("Sample NA11992 at site 1:10000005 should be AA");
|
Assert.fail("Sample NA11992 at site 1:10000005 should be AA");
|
||||||
}
|
}
|
||||||
seenNA11992 = true;
|
seenNA11992 = true;
|
||||||
|
|
@ -72,7 +76,7 @@ public class VCFReaderTest extends BaseTest {
|
||||||
if (record.getSampleName().equals("NA12287")) {
|
if (record.getSampleName().equals("NA12287")) {
|
||||||
List<VCFGenotypeEncoding> encodings = record.getAlleles();
|
List<VCFGenotypeEncoding> encodings = record.getAlleles();
|
||||||
if (!encodings.get(0).getBases().equals("A") ||
|
if (!encodings.get(0).getBases().equals("A") ||
|
||||||
!encodings.get(1).getBases().equals("T")) {
|
!encodings.get(1).getBases().equals("T")) {
|
||||||
Assert.fail("Sample NA11992 at site 1:10000005 should be AA");
|
Assert.fail("Sample NA11992 at site 1:10000005 should be AA");
|
||||||
}
|
}
|
||||||
seenNA12287 = true;
|
seenNA12287 = true;
|
||||||
|
|
@ -135,4 +139,49 @@ public class VCFReaderTest extends BaseTest {
|
||||||
Assert.assertTrue(values.get("C").equals(""));
|
Assert.assertTrue(values.get("C").equals(""));
|
||||||
Assert.assertTrue(values.get("D").equals(""));
|
Assert.assertTrue(values.get("D").equals(""));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testKiransOutOfOrderExample() {
|
||||||
|
ArrayList<String> header = new ArrayList<String>();
|
||||||
|
ArrayList<String> variant = new ArrayList<String>();
|
||||||
|
|
||||||
|
try {
|
||||||
|
FileReader reader = new FileReader(VCF_MIXUP_FILE);
|
||||||
|
BufferedReader breader = new BufferedReader(reader);
|
||||||
|
String line;
|
||||||
|
while ((line = breader.readLine()) != null) {
|
||||||
|
String[] pieces = line.split("\\s+");
|
||||||
|
|
||||||
|
if (line.contains("##")) {
|
||||||
|
continue;
|
||||||
|
} else if (line.contains("#CHROM")) {
|
||||||
|
for (int i = 0; i < pieces.length; i++) {
|
||||||
|
header.add(i, pieces[i]);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for (int i = 0; i < pieces.length; i++) {
|
||||||
|
variant.add(i, pieces[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (FileNotFoundException e) {
|
||||||
|
Assert.fail("Unable to open the mixed up VCF file.");
|
||||||
|
} catch (IOException e) {
|
||||||
|
Assert.fail("Unable to read from the mixed up VCF file.");
|
||||||
|
}
|
||||||
|
|
||||||
|
// now load up a ROD
|
||||||
|
Iterator<RodVCF> rod = RodVCF.createIterator("name",new File(VCF_MIXUP_FILE));
|
||||||
|
RodVCF newRod = rod.next();
|
||||||
|
List<VCFGenotypeRecord> records = newRod.mCurrentRecord.getVCFGenotypeRecords();
|
||||||
|
for (VCFGenotypeRecord record: records) {
|
||||||
|
if (!header.contains(record.getSampleName())) {
|
||||||
|
Assert.fail("Parsed header doesn't contain field " + record.getSampleName());
|
||||||
|
}
|
||||||
|
if (!variant.get(header.indexOf(record.getSampleName())).equals(record.toStringEncoding(newRod.mCurrentRecord.getAlternateAlleles()))) {
|
||||||
|
Assert.fail("Parsed value for " + record.getSampleName() + " doesn't contain the same value ( " + record.toStringEncoding(newRod.mCurrentRecord.getAlternateAlleles())
|
||||||
|
+ " != " + variant.get(header.indexOf(record.getSampleName())));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -142,10 +142,10 @@ public class VCFRecordTest extends BaseTest {
|
||||||
VCFRecord rec = makeFakeVCFRecord(infoFields);
|
VCFRecord rec = makeFakeVCFRecord(infoFields);
|
||||||
Map<String, String> metaData = new HashMap<String, String>();
|
Map<String, String> metaData = new HashMap<String, String>();
|
||||||
List<String> additionalColumns = new ArrayList<String>();
|
List<String> additionalColumns = new ArrayList<String>();
|
||||||
String rep = rec.toStringRepresentation(createFakeHeader());
|
String rep = rec.toStringEncoding(createFakeHeader());
|
||||||
Assert.assertTrue(stringRep.equals(rep));
|
Assert.assertTrue(stringRep.equals(rep));
|
||||||
rec.addInfoField("AB", "CD");
|
rec.addInfoField("AB", "CD");
|
||||||
String rep2 = rec.toStringRepresentation(createFakeHeader());
|
String rep2 = rec.toStringEncoding(createFakeHeader());
|
||||||
Assert.assertTrue(stringRep2.equals(rep2));
|
Assert.assertTrue(stringRep2.equals(rep2));
|
||||||
//rec.addGenotypeField(createGenotype("sample3","A","D12"));
|
//rec.addGenotypeField(createGenotype("sample3","A","D12"));
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue