Add code to allow reference alleles with 'N' in VariantContext, but not in the alternate allele(s). Also more updates to the VCF 4 code (fixed parsing for files without genotypes).

This check-in will temperarly break the build (I need to see if Bamboo is correctly returning the log file for the failed builds).  

Will be fixed once Bamboo starts building.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3609 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2010-06-22 18:26:37 +00:00
parent 824c2bbac0
commit a6d3e4bd47
5 changed files with 162 additions and 84 deletions

View File

@ -106,7 +106,7 @@ public class Allele implements Comparable<Allele> {
this.isRef = isRef; this.isRef = isRef;
this.bases = bases; this.bases = bases;
if ( ! acceptableAlleleBases(bases) ) if ( ! acceptableAlleleBases(bases,isRef) )
throw new IllegalArgumentException("Unexpected base in allele bases " + new String(bases)); throw new IllegalArgumentException("Unexpected base in allele bases " + new String(bases));
} }
@ -186,24 +186,29 @@ public class Allele implements Comparable<Allele> {
/** /**
* @param bases bases representing an allele * @param bases bases representing an allele
* @param reference is this the reference allele
* @return true if the bases represent the well formatted allele * @return true if the bases represent the well formatted allele
*/ */
public static boolean acceptableAlleleBases(String bases) { public static boolean acceptableAlleleBases(String bases, boolean reference) {
return acceptableAlleleBases(bases.getBytes()); return acceptableAlleleBases(bases.getBytes(),reference);
} }
/** /**
* @param bases bases representing an allele * @param bases bases representing an allele
* @param reference are we the reference (we allow n's in the reference allele)
* @return true if the bases represent the well formatted allele * @return true if the bases represent the well formatted allele
*/ */
public static boolean acceptableAlleleBases(byte[] bases) { public static boolean acceptableAlleleBases(byte[] bases, boolean reference) {
if ( wouldBeNullAllele(bases) || wouldBeNoCallAllele(bases) ) if ( wouldBeNullAllele(bases) || wouldBeNoCallAllele(bases) )
return true; return true;
for ( int i = 0; i < bases.length; i++ ) { for ( int i = 0; i < bases.length; i++ ) {
switch (bases[i]) { switch (bases[i]) {
case 'A': case 'C': case 'G': case 'T': break; case 'A': case 'C': case 'G': case 'T': break;
default: return false; case 'N': if(!reference)
return false; break;
default:
return false;
} }
// if ( ! BaseUtils.isRegularBase(bases[i]) ) { // if ( ! BaseUtils.isRegularBase(bases[i]) ) {
// return false; // return false;

View File

@ -94,7 +94,7 @@ public class VariantContextAdaptors {
private static class DBSnpAdaptor extends VCAdaptor { private static class DBSnpAdaptor extends VCAdaptor {
VariantContext convert(String name, Object input, ReferenceContext ref) { VariantContext convert(String name, Object input, ReferenceContext ref) {
DbSNPFeature dbsnp = (DbSNPFeature)input; DbSNPFeature dbsnp = (DbSNPFeature)input;
if ( ! Allele.acceptableAlleleBases(DbSNPHelper.getReference(dbsnp)) ) if ( ! Allele.acceptableAlleleBases(DbSNPHelper.getReference(dbsnp),true) )
return null; return null;
Allele refAllele = Allele.create(DbSNPHelper.getReference(dbsnp), true); Allele refAllele = Allele.create(DbSNPHelper.getReference(dbsnp), true);
@ -105,7 +105,7 @@ public class VariantContextAdaptors {
// add all of the alt alleles // add all of the alt alleles
for ( String alt : DbSNPHelper.getAlternateAlleleList(dbsnp) ) { for ( String alt : DbSNPHelper.getAlternateAlleleList(dbsnp) ) {
if ( ! Allele.acceptableAlleleBases(alt) ) { if ( ! Allele.acceptableAlleleBases(alt,false) ) {
//System.out.printf("Excluding dbsnp record %s%n", dbsnp); //System.out.printf("Excluding dbsnp record %s%n", dbsnp);
return null; return null;
} }
@ -131,7 +131,7 @@ public class VariantContextAdaptors {
private static VariantContext vcfToVariantContext(String name, VCFRecord vcf, ReferenceContext ref) { private static VariantContext vcfToVariantContext(String name, VCFRecord vcf, ReferenceContext ref) {
if ( vcf.isReference() || vcf.isSNP() || vcf.isIndel() ) { if ( vcf.isReference() || vcf.isSNP() || vcf.isIndel() ) {
// add the reference allele // add the reference allele
if ( ! Allele.acceptableAlleleBases(vcf.getReference()) ) { if ( ! Allele.acceptableAlleleBases(vcf.getReference(),true) ) {
System.out.printf("Excluding vcf record %s%n", vcf); System.out.printf("Excluding vcf record %s%n", vcf);
return null; return null;
} }
@ -146,7 +146,7 @@ public class VariantContextAdaptors {
alleles.add(refAllele); alleles.add(refAllele);
for ( VCFGenotypeEncoding alt : vcf.getAlternateAlleles() ) { for ( VCFGenotypeEncoding alt : vcf.getAlternateAlleles() ) {
if ( ! Allele.acceptableAlleleBases(alt.getBases()) ) { if ( ! Allele.acceptableAlleleBases(alt.getBases(),false) ) {
//System.out.printf("Excluding vcf record %s%n", vcf); //System.out.printf("Excluding vcf record %s%n", vcf);
return null; return null;
} }
@ -531,7 +531,7 @@ public class VariantContextAdaptors {
VariantContext convert(String name, Object input, ReferenceContext ref) { VariantContext convert(String name, Object input, ReferenceContext ref) {
RodGLF glf = (RodGLF)input; RodGLF glf = (RodGLF)input;
if ( ! Allele.acceptableAlleleBases(glf.getReference()) ) if ( ! Allele.acceptableAlleleBases(glf.getReference(),true) )
return null; return null;
Allele refAllele = Allele.create(glf.getReference(), true); Allele refAllele = Allele.create(glf.getReference(), true);
@ -543,7 +543,7 @@ public class VariantContextAdaptors {
// add all of the alt alleles // add all of the alt alleles
for ( String alt : glf.getAlternateAlleleList() ) { for ( String alt : glf.getAlternateAlleleList() ) {
if ( ! Allele.acceptableAlleleBases(alt) ) { if ( ! Allele.acceptableAlleleBases(alt,false) ) {
return null; return null;
} }
Allele allele = Allele.create(alt, false); Allele allele = Allele.create(alt, false);
@ -603,7 +603,7 @@ public class VariantContextAdaptors {
*/ */
VariantContext convert(String name, Object input, ReferenceContext ref) { VariantContext convert(String name, Object input, ReferenceContext ref) {
GeliTextFeature geli = (GeliTextFeature)input; GeliTextFeature geli = (GeliTextFeature)input;
if ( ! Allele.acceptableAlleleBases(String.valueOf(geli.getRefBase())) ) if ( ! Allele.acceptableAlleleBases(String.valueOf(geli.getRefBase()),true) )
return null; return null;
Allele refAllele = Allele.create(String.valueOf(geli.getRefBase()), true); Allele refAllele = Allele.create(String.valueOf(geli.getRefBase()), true);
@ -614,7 +614,7 @@ public class VariantContextAdaptors {
List<Allele> genotypeAlleles = new ArrayList<Allele>(); List<Allele> genotypeAlleles = new ArrayList<Allele>();
// add all of the alt alleles // add all of the alt alleles
for ( char alt : geli.getGenotype().toString().toCharArray() ) { for ( char alt : geli.getGenotype().toString().toCharArray() ) {
if ( ! Allele.acceptableAlleleBases(String.valueOf(alt)) ) { if ( ! Allele.acceptableAlleleBases(String.valueOf(alt),false) ) {
return null; return null;
} }
Allele allele = Allele.create(String.valueOf(alt), false); Allele allele = Allele.create(String.valueOf(alt), false);
@ -670,7 +670,7 @@ public class VariantContextAdaptors {
*/ */
VariantContext convert(String name, Object input, ReferenceContext ref) { VariantContext convert(String name, Object input, ReferenceContext ref) {
GenotypeLikelihoods geli = ((rodGELI)input).getGeliRecord(); GenotypeLikelihoods geli = ((rodGELI)input).getGeliRecord();
if ( ! Allele.acceptableAlleleBases(String.valueOf((char)geli.getReferenceBase())) ) if ( ! Allele.acceptableAlleleBases(String.valueOf((char)geli.getReferenceBase()),true) )
return null; return null;
Allele refAllele = Allele.create(String.valueOf((char)geli.getReferenceBase()), true); Allele refAllele = Allele.create(String.valueOf((char)geli.getReferenceBase()), true);
@ -679,14 +679,14 @@ public class VariantContextAdaptors {
alleles.add(refAllele); alleles.add(refAllele);
// add the two alt alleles // add the two alt alleles
if (!Allele.acceptableAlleleBases(String.valueOf((char) geli.getBestGenotype().getAllele1()))) { if (!Allele.acceptableAlleleBases(String.valueOf((char) geli.getBestGenotype().getAllele1()),false)) {
return null; return null;
} }
Allele allele1 = Allele.create(String.valueOf((char) geli.getBestGenotype().getAllele1()), false); Allele allele1 = Allele.create(String.valueOf((char) geli.getBestGenotype().getAllele1()), false);
if (!alleles.contains(allele1) && !allele1.equals(refAllele, true)) alleles.add(allele1); if (!alleles.contains(allele1) && !allele1.equals(refAllele, true)) alleles.add(allele1);
// add the two alt alleles // add the two alt alleles
if (!Allele.acceptableAlleleBases(String.valueOf((char) geli.getBestGenotype().getAllele2()))) { if (!Allele.acceptableAlleleBases(String.valueOf((char) geli.getBestGenotype().getAllele2()),false)) {
return null; return null;
} }
Allele allele2 = Allele.create(String.valueOf((char) geli.getBestGenotype().getAllele2()), false); Allele allele2 = Allele.create(String.valueOf((char) geli.getBestGenotype().getAllele2()), false);

View File

@ -28,31 +28,29 @@ public class VCF4Codec implements FeatureCodec {
// we have to store the list of strings that make up the header until they're needed // we have to store the list of strings that make up the header until they're needed
private VCFHeader header = null; private VCFHeader header = null;
// used to subtract from the // used to convert the index of the alternate allele in genotypes to a integer index
private static int ZERO_CHAR = (byte)'0'; private static int ZERO_CHAR = (byte)'0';
// a mapping of the allele // a mapping of the allele
private static Map<String, List<Allele>> alleleMap = new HashMap<String, List<Allele>>(3); private static Map<String, List<Allele>> alleleMap = new HashMap<String, List<Allele>>(3);
//private static String[] CachedGTKey = new String[100]; // cache the genotyope values
private static String[] CachedGTValues = new String[100]; private static String[] CachedGTValues = new String[100];
// for performance testing purposes
public static boolean parseGenotypesToo = true;
// for performance testing purposes // for performance testing purposes
public static boolean validate = true; public static boolean validate = true;
// a key optimization -- we need a per thread string parts array, so we don't allocate a big array over and over // a key optimization -- we need a per thread string parts array, so we don't allocate a big array over and over
// todo: make this thread safe?
private String[] parts = null; private String[] parts = null;
// for performance we cache the hashmap of filter encodings for quick lookup // for performance we cache the hashmap of filter encodings for quick lookup
private HashMap<String,LinkedHashSet<String>> filterHash = new HashMap<String,LinkedHashSet<String>>(); private HashMap<String,LinkedHashSet<String>> filterHash = new HashMap<String,LinkedHashSet<String>>();
// a set of the genotype keys? TODO: rename to something better // a set of the genotype keys?
private String[] GTKeys = new String[100]; private String[] genotypeKeyArray = new String[100];
// a list of the info fields, filter fields, and format fields // a list of the info fields, filter fields, and format fields, for quick lookup to validate against
ArrayList<String> infoFields = new ArrayList<String>(); ArrayList<String> infoFields = new ArrayList<String>();
ArrayList<String> formatFields = new ArrayList<String>(); ArrayList<String> formatFields = new ArrayList<String>();
ArrayList<String> filterFields = new ArrayList<String>(); ArrayList<String> filterFields = new ArrayList<String>();
@ -60,6 +58,9 @@ public class VCF4Codec implements FeatureCodec {
// do we want to validate the info, format, and filter fields // do we want to validate the info, format, and filter fields
private final boolean validateFromHeader = true; private final boolean validateFromHeader = true;
// we store a name to give to each of the variant contexts we emit
private String name = "Unkown";
/** /**
* this method is a big hack, since I haven't gotten to updating the VCF header for the 4.0 updates * this method is a big hack, since I haven't gotten to updating the VCF header for the 4.0 updates
* @param reader the line reader to take header lines from * @param reader the line reader to take header lines from
@ -76,6 +77,27 @@ public class VCF4Codec implements FeatureCodec {
headerStrings.add(line); headerStrings.add(line);
} }
else if (line.startsWith("#")) { else if (line.startsWith("#")) {
return createHeader(headerStrings, line);
}
else {
throw new CodecLineParsingException("We never saw the required header line (starting with one #) for the input VCF file");
}
}
} catch (IOException e) {
throw new RuntimeException("IO Exception ", e);
}
throw new CodecLineParsingException("We never saw the required header line (starting with one #) for the input VCF file");
}
/**
* create a VCF header
* @param headerStrings a list of strings that represent all the ## entries
* @param line the single # line (column names)
* @return the count of header lines
*/
private int createHeader(List<String> headerStrings, String line) {
headerStrings.add(line); headerStrings.add(line);
header = VCFReaderUtils.createHeader(headerStrings, VCFHeaderVersion.VCF4_0); header = VCFReaderUtils.createHeader(headerStrings, VCFHeaderVersion.VCF4_0);
@ -91,25 +113,19 @@ public class VCF4Codec implements FeatureCodec {
if (hl.getClass() == VCFInfoHeaderLine.class) if (hl.getClass() == VCFInfoHeaderLine.class)
this.infoFields.add(((VCFInfoHeaderLine)hl).getmName()); this.infoFields.add(((VCFInfoHeaderLine)hl).getmName());
} }
// sort the lists // sort the lists so we can binary search them later on
Collections.sort(filterFields); Collections.sort(filterFields);
Collections.sort(formatFields); Collections.sort(formatFields);
Collections.sort(infoFields); Collections.sort(infoFields);
return headerStrings.size(); return headerStrings.size();
} }
else {
throw new CodecLineParsingException("We never saw the required header line (starting with one #) for the input VCF file");
}
}
} catch (IOException e) {
throw new RuntimeException("IO Exception ", e);
}
throw new CodecLineParsingException("We never saw the required header line (starting with one #) for the input VCF file");
}
/**
* the fast decode function
* @param line the line of text for the record
* @return a feature, (not guaranteed complete) that has the correct start and stop
*/
public Feature decodeLoc(String line) { public Feature decodeLoc(String line) {
return decode(line); return decode(line);
} }
@ -205,6 +221,11 @@ public class VCF4Codec implements FeatureCodec {
return attributes; return attributes;
} }
/**
* validate the attributes against the stored fields of the appopriate type
* @param attributes the list of fields to check for inclusion against the field array
* @param fields the master list; all attributes must be in this list to validate
*/
private void validateFields(Set<String> attributes, List<String> fields) { private void validateFields(Set<String> attributes, List<String> fields) {
// validate the info fields // validate the info fields
if (validateFromHeader) { if (validateFromHeader) {
@ -234,15 +255,15 @@ public class VCF4Codec implements FeatureCodec {
private List<Allele> parseAlleles(String ref, String alts) { private List<Allele> parseAlleles(String ref, String alts) {
List<Allele> alleles = new ArrayList<Allele>(2); // we are almost always biallelic List<Allele> alleles = new ArrayList<Allele>(2); // we are almost always biallelic
// ref // ref
checkAllele(ref); checkAllele(ref, true);
Allele refAllele = Allele.create(ref, true); Allele refAllele = Allele.create(ref, true);
alleles.add(refAllele); alleles.add(refAllele);
if ( alts.indexOf(",") == -1 ) // only 1 alternatives, don't call string split if ( alts.indexOf(",") == -1 ) // only 1 alternatives, don't call string split
parseSingleAllele(alleles, alts); parseSingleAllele(alleles, alts, false);
else else
for ( String alt : Utils.split(alts, ",") ) for ( String alt : Utils.split(alts, ",") )
parseSingleAllele(alleles, alt); parseSingleAllele(alleles, alt, false);
return alleles; return alleles;
} }
@ -250,9 +271,10 @@ public class VCF4Codec implements FeatureCodec {
/** /**
* check to make sure the allele is an acceptable allele * check to make sure the allele is an acceptable allele
* @param allele the allele to check * @param allele the allele to check
* @param isRef are we the reference allele?
*/ */
private static void checkAllele(String allele) { private static void checkAllele(String allele,boolean isRef) {
if ( ! Allele.acceptableAlleleBases(allele) ) { if ( ! Allele.acceptableAlleleBases(allele,isRef) ) {
throw new StingException("Unparsable vcf record with allele " + allele); throw new StingException("Unparsable vcf record with allele " + allele);
} }
} }
@ -261,9 +283,10 @@ public class VCF4Codec implements FeatureCodec {
* parse a single allele, given the allele list * parse a single allele, given the allele list
* @param alleles the alleles available * @param alleles the alleles available
* @param alt the allele to parse * @param alt the allele to parse
* @param isRef are we the reference allele?
*/ */
private void parseSingleAllele(List<Allele> alleles, String alt) { private void parseSingleAllele(List<Allele> alleles, String alt, boolean isRef) {
checkAllele(alt); checkAllele(alt,isRef);
Allele allele = Allele.create(alt, false); Allele allele = Allele.create(alt, false);
if ( ! allele.isNoCall() ) if ( ! allele.isNoCall() )
@ -304,10 +327,12 @@ public class VCF4Codec implements FeatureCodec {
/** /**
* parse out the VCF line * parse out the VCF line
*
* @param parts the parts split up * @param parts the parts split up
* @return a variant context object * @return a variant context object
*/ */
private VariantContext parseVCFLine(String[] parts) { private VariantContext parseVCFLine(String[] parts) {
// parse out the required fields
String contig = parts[0]; String contig = parts[0];
long pos = Long.valueOf(parts[1]); long pos = Long.valueOf(parts[1]);
String id = parts[2]; String id = parts[2];
@ -316,24 +341,29 @@ public class VCF4Codec implements FeatureCodec {
Double qual = parseQual(parts[5]); Double qual = parseQual(parts[5]);
String filter = parts[6]; String filter = parts[6];
String info = parts[7]; String info = parts[7];
String GT = parts[8];
int genotypesStart = 9;
List<Allele> alleles = parseAlleles(ref, alts); List<Allele> alleles = parseAlleles(ref, alts);
Set<String> filters = parseFilters(filter); Set<String> filters = parseFilters(filter);
Map<String, Object> attributes = parseInfo(info, id); Map<String, Object> attributes = parseInfo(info, id);
// parse genotypes // find out our current location, and clip the alleles down to their minimum length
int nGTKeys = ParsingUtils.split(GT, GTKeys, ':');
Map<String, Genotype> genotypes = new HashMap<String, Genotype>(Math.max(parts.length - genotypesStart, 1));
Iterator<String> iter = header.getGenotypeSamples().iterator();
Pair<GenomeLoc, List<Allele>> locAndAlleles = (ref.length() > 1) ? Pair<GenomeLoc, List<Allele>> locAndAlleles = (ref.length() > 1) ?
clipAlleles(contig, pos, ref, alleles) : clipAlleles(contig, pos, ref, alleles) :
new Pair<GenomeLoc, List<Allele>>(GenomeLocParser.createGenomeLoc(contig, pos), alleles); new Pair<GenomeLoc, List<Allele>>(GenomeLocParser.createGenomeLoc(contig, pos), alleles);
// a map to store our genotypes
Map<String, Genotype> genotypes = null;
// do we have genotyping data
if (parts.length > 8) {
String GT = parts[8];
int genotypesStart = 9;
// parse genotypes
int nGTKeys = ParsingUtils.split(GT, genotypeKeyArray, ':');
genotypes = new HashMap<String, Genotype>(Math.max(parts.length - genotypesStart, 1));
Iterator<String> iter = header.getGenotypeSamples().iterator();
if ( parseGenotypesToo ) {
alleleMap.clear(); alleleMap.clear();
for (int genotypeOffset = genotypesStart; genotypeOffset < parts.length; genotypeOffset++) { for (int genotypeOffset = genotypesStart; genotypeOffset < parts.length; genotypeOffset++) {
String sample = parts[genotypeOffset]; String sample = parts[genotypeOffset];
@ -348,27 +378,27 @@ public class VCF4Codec implements FeatureCodec {
if (nGTKeys > 1) { if (nGTKeys > 1) {
gtAttributes = new HashMap<String, String>(nGTKeys - 1); gtAttributes = new HashMap<String, String>(nGTKeys - 1);
for (int i = 1; i < nGTKeys; i++) { for (int i = 1; i < nGTKeys; i++) {
if ( GTKeys[i].equals("GQ") ) { if (genotypeKeyArray[i].equals("GQ")) {
GTQual = parseQual(GTValues[i]); GTQual = parseQual(GTValues[i]);
} if ( GTKeys[i].equals("FL") ) { // deal with genotype filters here }
if (genotypeKeyArray[i].equals("FL")) { // deal with genotype filters here
genotypeFilters.addAll(parseFilters(GTValues[i])); genotypeFilters.addAll(parseFilters(GTValues[i]));
} else { } else {
gtAttributes.put(GTKeys[i], GTValues[i]); gtAttributes.put(genotypeKeyArray[i], GTValues[i]);
} }
} }
// validate the format fields // validate the format fields
validateFields(gtAttributes.keySet(), formatFields); validateFields(gtAttributes.keySet(), formatFields);
} }
boolean phased = GTKeys[0].charAt(1) == '|'; boolean phased = genotypeKeyArray[0].charAt(1) == '|';
Genotype g = new Genotype(iter.next(), genotypeAlleles, GTQual, genotypeFilters, gtAttributes, phased); Genotype g = new Genotype(iter.next(), genotypeAlleles, GTQual, genotypeFilters, gtAttributes, phased);
genotypes.put(g.getSampleName(), g); genotypes.put(g.getSampleName(), g);
} }
} }
// todo -- we need access to our track name to name the variant context // todo -- we need access to our track name to name the variant context
return new VariantContext("foo", locAndAlleles.first, locAndAlleles.second, genotypes, qual, filters, attributes); return new VariantContext(name, locAndAlleles.first, locAndAlleles.second, genotypes, qual, filters, attributes);
} }
/** /**
@ -434,4 +464,20 @@ public class VCF4Codec implements FeatureCodec {
if (clazz != VCFHeader.class) throw new ClassCastException("expecting class " + clazz + " but VCF4Codec provides " + VCFHeader.class); if (clazz != VCFHeader.class) throw new ClassCastException("expecting class " + clazz + " but VCF4Codec provides " + VCFHeader.class);
return this.header; return this.header;
} }
/**
* get the name of this codec
* @return our set name
*/
public String getName() {
return name;
}
/**
* set the name of this codec
* @param name
*/
public void setName(String name) {
this.name = name;
}
} }

View File

@ -92,7 +92,7 @@ public class VCF4ReaderTestWalker extends RodWalker<VCFRecord,Long> {
String[] parts = new String[10000]; String[] parts = new String[10000];
public void onTraversalDone(Long num){ public void onTraversalDone(Long num){
VCF4Codec vcf4codec = new VCF4Codec(); VCF4Codec vcf4codec = new VCF4Codec();
VCF4Codec.parseGenotypesToo = splitFile == ParsingStatus.GENOTYPES; //VCF4Codec.parseGenotypesToo = splitFile == ParsingStatus.GENOTYPES;
VCF4Codec.validate = ! DontValidate; VCF4Codec.validate = ! DontValidate;
VCFCodec vcf3codec = new VCFCodec(); VCFCodec vcf3codec = new VCFCodec();

View File

@ -26,7 +26,8 @@ import java.util.Set;
* test out pieces of the VCF 4 codec. * test out pieces of the VCF 4 codec.
*/ */
public class VCF4UnitTest extends BaseTest { public class VCF4UnitTest extends BaseTest {
File vcfFile = new File("testdata/vcfExmaple.vcf4"); File vcfGenotypeFile = new File("testdata/vcf/vcfWithGenotypes.vcf");
File vcfNoGenotypeFile = new File("testdata/vcf/vcfWithoutGenotypes.vcf");
// setup the contig ordering // setup the contig ordering
@BeforeClass @BeforeClass
@ -42,7 +43,7 @@ public class VCF4UnitTest extends BaseTest {
@Test @Test
public void testReadBasicHeader() { public void testReadBasicHeader() {
TestSetup testSetup = new TestSetup().invoke(vcfFile); TestSetup testSetup = new TestSetup().invoke(vcfGenotypeFile);
int seenRecords = 0; int seenRecords = 0;
@ -95,7 +96,7 @@ public class VCF4UnitTest extends BaseTest {
@Test @Test
public void testOutputHeader() { public void testOutputHeader() {
TestSetup testSetup = new TestSetup().invoke(vcfFile); TestSetup testSetup = new TestSetup().invoke(vcfGenotypeFile);
File tempFile = null; File tempFile = null;
try { try {
@ -115,9 +116,8 @@ public class VCF4UnitTest extends BaseTest {
@Test @Test
public void testCountVCF4Records() { public void testCountVCF4Records() {
TestSetup testSetup = new TestSetup().invoke(vcfFile); TestSetup testSetup = new TestSetup().invoke(vcfGenotypeFile);
AsciiLineReader reader = testSetup.getReader(); AsciiLineReader reader = testSetup.getReader();
VCF4Codec codec = testSetup.getCodec();
// now parse the lines // now parse the lines
String line = null; String line = null;
@ -139,6 +139,33 @@ public class VCF4UnitTest extends BaseTest {
Assert.fail("Failed to read a line"); Assert.fail("Failed to read a line");
} }
} }
Assert.assertEquals(7,recordCount);
}
@Test
public void testCountVCF4RecordsWithoutGenotypes() {
TestSetup testSetup = new TestSetup().invoke(vcfNoGenotypeFile);
AsciiLineReader reader = testSetup.getReader();
// now parse the lines
String line = null;
try {
line = reader.readLine();
} catch (IOException e) {
Assert.fail("Failed to read a line");
}
// our record count
int recordCount = 0;
while (line != null) {
try {
recordCount++;
testSetup.codec.decode(line);
line = reader.readLine();
} catch (IOException e) {
Assert.fail("Failed to read a line");
}
}
Assert.assertEquals(6,recordCount); Assert.assertEquals(6,recordCount);
} }
@ -150,25 +177,25 @@ public class VCF4UnitTest extends BaseTest {
@Test @Test
public void testCheckInfoValidation() { public void testCheckInfoValidation() {
TestSetup testSetup = new TestSetup().invoke(vcfFile); TestSetup testSetup = new TestSetup().invoke(vcfGenotypeFile);
testSetup.codec.decode(regularLine); testSetup.codec.decode(regularLine);
} }
@Test @Test
public void testCheckTwoFewInfoValidation() { public void testCheckTwoFewInfoValidation() {
TestSetup testSetup = new TestSetup().invoke(vcfFile); TestSetup testSetup = new TestSetup().invoke(vcfGenotypeFile);
testSetup.codec.decode(twoFewInfoLine); testSetup.codec.decode(twoFewInfoLine);
} }
@Test(expected=StingException.class) @Test(expected=StingException.class)
public void testCheckTwoManyInfoValidation() { public void testCheckTwoManyInfoValidation() {
TestSetup testSetup = new TestSetup().invoke(vcfFile); TestSetup testSetup = new TestSetup().invoke(vcfGenotypeFile);
testSetup.codec.decode(twoManyInfoLine); testSetup.codec.decode(twoManyInfoLine);
} }
//File largeVCF = new File("/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/1000GenomesTable1/dindel-v2/CEU.low_coverage.2010_06.indel.genotypes.vcf"); /* File largeVCF = new File("/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/1000GenomesTable1/dindel-v2/CEU.low_coverage.2010_06.indel.genotypes.vcf");
/*@Test @Test
public void checkLargeVCF() { public void checkLargeVCF() {
TestSetup testSetup = new TestSetup().invoke(largeVCF); TestSetup testSetup = new TestSetup().invoke(largeVCF);
AsciiLineReader reader = testSetup.getReader(); AsciiLineReader reader = testSetup.getReader();