Add code to allow reference alleles with 'N' in VariantContext, but not in the alternate allele(s). Also more updates to the VCF 4 code (fixed parsing for files without genotypes).
This check-in will temperarly break the build (I need to see if Bamboo is correctly returning the log file for the failed builds). Will be fixed once Bamboo starts building. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3609 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
824c2bbac0
commit
a6d3e4bd47
|
|
@ -106,7 +106,7 @@ public class Allele implements Comparable<Allele> {
|
|||
this.isRef = isRef;
|
||||
this.bases = bases;
|
||||
|
||||
if ( ! acceptableAlleleBases(bases) )
|
||||
if ( ! acceptableAlleleBases(bases,isRef) )
|
||||
throw new IllegalArgumentException("Unexpected base in allele bases " + new String(bases));
|
||||
}
|
||||
|
||||
|
|
@ -186,24 +186,29 @@ public class Allele implements Comparable<Allele> {
|
|||
|
||||
/**
|
||||
* @param bases bases representing an allele
|
||||
* @param reference is this the reference allele
|
||||
* @return true if the bases represent the well formatted allele
|
||||
*/
|
||||
public static boolean acceptableAlleleBases(String bases) {
|
||||
return acceptableAlleleBases(bases.getBytes());
|
||||
public static boolean acceptableAlleleBases(String bases, boolean reference) {
|
||||
return acceptableAlleleBases(bases.getBytes(),reference);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param bases bases representing an allele
|
||||
* @param reference are we the reference (we allow n's in the reference allele)
|
||||
* @return true if the bases represent the well formatted allele
|
||||
*/
|
||||
public static boolean acceptableAlleleBases(byte[] bases) {
|
||||
public static boolean acceptableAlleleBases(byte[] bases, boolean reference) {
|
||||
if ( wouldBeNullAllele(bases) || wouldBeNoCallAllele(bases) )
|
||||
return true;
|
||||
|
||||
for ( int i = 0; i < bases.length; i++ ) {
|
||||
switch (bases[i]) {
|
||||
case 'A': case 'C': case 'G': case 'T': break;
|
||||
default: return false;
|
||||
case 'N': if(!reference)
|
||||
return false; break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
// if ( ! BaseUtils.isRegularBase(bases[i]) ) {
|
||||
// return false;
|
||||
|
|
|
|||
|
|
@ -94,7 +94,7 @@ public class VariantContextAdaptors {
|
|||
private static class DBSnpAdaptor extends VCAdaptor {
|
||||
VariantContext convert(String name, Object input, ReferenceContext ref) {
|
||||
DbSNPFeature dbsnp = (DbSNPFeature)input;
|
||||
if ( ! Allele.acceptableAlleleBases(DbSNPHelper.getReference(dbsnp)) )
|
||||
if ( ! Allele.acceptableAlleleBases(DbSNPHelper.getReference(dbsnp),true) )
|
||||
return null;
|
||||
Allele refAllele = Allele.create(DbSNPHelper.getReference(dbsnp), true);
|
||||
|
||||
|
|
@ -105,7 +105,7 @@ public class VariantContextAdaptors {
|
|||
|
||||
// add all of the alt alleles
|
||||
for ( String alt : DbSNPHelper.getAlternateAlleleList(dbsnp) ) {
|
||||
if ( ! Allele.acceptableAlleleBases(alt) ) {
|
||||
if ( ! Allele.acceptableAlleleBases(alt,false) ) {
|
||||
//System.out.printf("Excluding dbsnp record %s%n", dbsnp);
|
||||
return null;
|
||||
}
|
||||
|
|
@ -131,7 +131,7 @@ public class VariantContextAdaptors {
|
|||
private static VariantContext vcfToVariantContext(String name, VCFRecord vcf, ReferenceContext ref) {
|
||||
if ( vcf.isReference() || vcf.isSNP() || vcf.isIndel() ) {
|
||||
// add the reference allele
|
||||
if ( ! Allele.acceptableAlleleBases(vcf.getReference()) ) {
|
||||
if ( ! Allele.acceptableAlleleBases(vcf.getReference(),true) ) {
|
||||
System.out.printf("Excluding vcf record %s%n", vcf);
|
||||
return null;
|
||||
}
|
||||
|
|
@ -146,7 +146,7 @@ public class VariantContextAdaptors {
|
|||
alleles.add(refAllele);
|
||||
|
||||
for ( VCFGenotypeEncoding alt : vcf.getAlternateAlleles() ) {
|
||||
if ( ! Allele.acceptableAlleleBases(alt.getBases()) ) {
|
||||
if ( ! Allele.acceptableAlleleBases(alt.getBases(),false) ) {
|
||||
//System.out.printf("Excluding vcf record %s%n", vcf);
|
||||
return null;
|
||||
}
|
||||
|
|
@ -531,7 +531,7 @@ public class VariantContextAdaptors {
|
|||
VariantContext convert(String name, Object input, ReferenceContext ref) {
|
||||
RodGLF glf = (RodGLF)input;
|
||||
|
||||
if ( ! Allele.acceptableAlleleBases(glf.getReference()) )
|
||||
if ( ! Allele.acceptableAlleleBases(glf.getReference(),true) )
|
||||
return null;
|
||||
Allele refAllele = Allele.create(glf.getReference(), true);
|
||||
|
||||
|
|
@ -543,7 +543,7 @@ public class VariantContextAdaptors {
|
|||
|
||||
// add all of the alt alleles
|
||||
for ( String alt : glf.getAlternateAlleleList() ) {
|
||||
if ( ! Allele.acceptableAlleleBases(alt) ) {
|
||||
if ( ! Allele.acceptableAlleleBases(alt,false) ) {
|
||||
return null;
|
||||
}
|
||||
Allele allele = Allele.create(alt, false);
|
||||
|
|
@ -603,7 +603,7 @@ public class VariantContextAdaptors {
|
|||
*/
|
||||
VariantContext convert(String name, Object input, ReferenceContext ref) {
|
||||
GeliTextFeature geli = (GeliTextFeature)input;
|
||||
if ( ! Allele.acceptableAlleleBases(String.valueOf(geli.getRefBase())) )
|
||||
if ( ! Allele.acceptableAlleleBases(String.valueOf(geli.getRefBase()),true) )
|
||||
return null;
|
||||
Allele refAllele = Allele.create(String.valueOf(geli.getRefBase()), true);
|
||||
|
||||
|
|
@ -614,7 +614,7 @@ public class VariantContextAdaptors {
|
|||
List<Allele> genotypeAlleles = new ArrayList<Allele>();
|
||||
// add all of the alt alleles
|
||||
for ( char alt : geli.getGenotype().toString().toCharArray() ) {
|
||||
if ( ! Allele.acceptableAlleleBases(String.valueOf(alt)) ) {
|
||||
if ( ! Allele.acceptableAlleleBases(String.valueOf(alt),false) ) {
|
||||
return null;
|
||||
}
|
||||
Allele allele = Allele.create(String.valueOf(alt), false);
|
||||
|
|
@ -670,7 +670,7 @@ public class VariantContextAdaptors {
|
|||
*/
|
||||
VariantContext convert(String name, Object input, ReferenceContext ref) {
|
||||
GenotypeLikelihoods geli = ((rodGELI)input).getGeliRecord();
|
||||
if ( ! Allele.acceptableAlleleBases(String.valueOf((char)geli.getReferenceBase())) )
|
||||
if ( ! Allele.acceptableAlleleBases(String.valueOf((char)geli.getReferenceBase()),true) )
|
||||
return null;
|
||||
Allele refAllele = Allele.create(String.valueOf((char)geli.getReferenceBase()), true);
|
||||
|
||||
|
|
@ -679,14 +679,14 @@ public class VariantContextAdaptors {
|
|||
alleles.add(refAllele);
|
||||
|
||||
// add the two alt alleles
|
||||
if (!Allele.acceptableAlleleBases(String.valueOf((char) geli.getBestGenotype().getAllele1()))) {
|
||||
if (!Allele.acceptableAlleleBases(String.valueOf((char) geli.getBestGenotype().getAllele1()),false)) {
|
||||
return null;
|
||||
}
|
||||
Allele allele1 = Allele.create(String.valueOf((char) geli.getBestGenotype().getAllele1()), false);
|
||||
if (!alleles.contains(allele1) && !allele1.equals(refAllele, true)) alleles.add(allele1);
|
||||
|
||||
// add the two alt alleles
|
||||
if (!Allele.acceptableAlleleBases(String.valueOf((char) geli.getBestGenotype().getAllele2()))) {
|
||||
if (!Allele.acceptableAlleleBases(String.valueOf((char) geli.getBestGenotype().getAllele2()),false)) {
|
||||
return null;
|
||||
}
|
||||
Allele allele2 = Allele.create(String.valueOf((char) geli.getBestGenotype().getAllele2()), false);
|
||||
|
|
|
|||
|
|
@ -28,31 +28,29 @@ public class VCF4Codec implements FeatureCodec {
|
|||
// we have to store the list of strings that make up the header until they're needed
|
||||
private VCFHeader header = null;
|
||||
|
||||
// used to subtract from the
|
||||
// used to convert the index of the alternate allele in genotypes to a integer index
|
||||
private static int ZERO_CHAR = (byte)'0';
|
||||
|
||||
// a mapping of the allele
|
||||
private static Map<String, List<Allele>> alleleMap = new HashMap<String, List<Allele>>(3);
|
||||
|
||||
//private static String[] CachedGTKey = new String[100];
|
||||
// cache the genotyope values
|
||||
private static String[] CachedGTValues = new String[100];
|
||||
|
||||
// for performance testing purposes
|
||||
public static boolean parseGenotypesToo = true;
|
||||
|
||||
// for performance testing purposes
|
||||
public static boolean validate = true;
|
||||
|
||||
// a key optimization -- we need a per thread string parts array, so we don't allocate a big array over and over
|
||||
// todo: make this thread safe?
|
||||
private String[] parts = null;
|
||||
|
||||
// for performance we cache the hashmap of filter encodings for quick lookup
|
||||
private HashMap<String,LinkedHashSet<String>> filterHash = new HashMap<String,LinkedHashSet<String>>();
|
||||
|
||||
// a set of the genotype keys? TODO: rename to something better
|
||||
private String[] GTKeys = new String[100];
|
||||
// a set of the genotype keys?
|
||||
private String[] genotypeKeyArray = new String[100];
|
||||
|
||||
// a list of the info fields, filter fields, and format fields
|
||||
// a list of the info fields, filter fields, and format fields, for quick lookup to validate against
|
||||
ArrayList<String> infoFields = new ArrayList<String>();
|
||||
ArrayList<String> formatFields = new ArrayList<String>();
|
||||
ArrayList<String> filterFields = new ArrayList<String>();
|
||||
|
|
@ -60,6 +58,9 @@ public class VCF4Codec implements FeatureCodec {
|
|||
// do we want to validate the info, format, and filter fields
|
||||
private final boolean validateFromHeader = true;
|
||||
|
||||
// we store a name to give to each of the variant contexts we emit
|
||||
private String name = "Unkown";
|
||||
|
||||
/**
|
||||
* this method is a big hack, since I haven't gotten to updating the VCF header for the 4.0 updates
|
||||
* @param reader the line reader to take header lines from
|
||||
|
|
@ -76,27 +77,7 @@ public class VCF4Codec implements FeatureCodec {
|
|||
headerStrings.add(line);
|
||||
}
|
||||
else if (line.startsWith("#")) {
|
||||
headerStrings.add(line);
|
||||
header = VCFReaderUtils.createHeader(headerStrings, VCFHeaderVersion.VCF4_0);
|
||||
|
||||
// load the parsing fields
|
||||
Set<VCFHeaderLine> headerLines = header.getMetaData();
|
||||
|
||||
// setup our look-up lists for validation
|
||||
for (VCFHeaderLine hl : headerLines) {
|
||||
if (hl.getClass() == VCFFilterHeaderLine.class)
|
||||
this.filterFields.add(((VCFFilterHeaderLine)hl).getmName());
|
||||
if (hl.getClass() == VCFFormatHeaderLine.class)
|
||||
this.formatFields.add(((VCFFormatHeaderLine)hl).getmName());
|
||||
if (hl.getClass() == VCFInfoHeaderLine.class)
|
||||
this.infoFields.add(((VCFInfoHeaderLine)hl).getmName());
|
||||
}
|
||||
// sort the lists
|
||||
Collections.sort(filterFields);
|
||||
Collections.sort(formatFields);
|
||||
Collections.sort(infoFields);
|
||||
|
||||
return headerStrings.size();
|
||||
return createHeader(headerStrings, line);
|
||||
}
|
||||
else {
|
||||
throw new CodecLineParsingException("We never saw the required header line (starting with one #) for the input VCF file");
|
||||
|
|
@ -110,6 +91,41 @@ public class VCF4Codec implements FeatureCodec {
|
|||
|
||||
}
|
||||
|
||||
/**
|
||||
* create a VCF header
|
||||
* @param headerStrings a list of strings that represent all the ## entries
|
||||
* @param line the single # line (column names)
|
||||
* @return the count of header lines
|
||||
*/
|
||||
private int createHeader(List<String> headerStrings, String line) {
|
||||
headerStrings.add(line);
|
||||
header = VCFReaderUtils.createHeader(headerStrings, VCFHeaderVersion.VCF4_0);
|
||||
|
||||
// load the parsing fields
|
||||
Set<VCFHeaderLine> headerLines = header.getMetaData();
|
||||
|
||||
// setup our look-up lists for validation
|
||||
for (VCFHeaderLine hl : headerLines) {
|
||||
if (hl.getClass() == VCFFilterHeaderLine.class)
|
||||
this.filterFields.add(((VCFFilterHeaderLine)hl).getmName());
|
||||
if (hl.getClass() == VCFFormatHeaderLine.class)
|
||||
this.formatFields.add(((VCFFormatHeaderLine)hl).getmName());
|
||||
if (hl.getClass() == VCFInfoHeaderLine.class)
|
||||
this.infoFields.add(((VCFInfoHeaderLine)hl).getmName());
|
||||
}
|
||||
// sort the lists so we can binary search them later on
|
||||
Collections.sort(filterFields);
|
||||
Collections.sort(formatFields);
|
||||
Collections.sort(infoFields);
|
||||
|
||||
return headerStrings.size();
|
||||
}
|
||||
|
||||
/**
|
||||
* the fast decode function
|
||||
* @param line the line of text for the record
|
||||
* @return a feature, (not guaranteed complete) that has the correct start and stop
|
||||
*/
|
||||
public Feature decodeLoc(String line) {
|
||||
return decode(line);
|
||||
}
|
||||
|
|
@ -205,6 +221,11 @@ public class VCF4Codec implements FeatureCodec {
|
|||
return attributes;
|
||||
}
|
||||
|
||||
/**
|
||||
* validate the attributes against the stored fields of the appopriate type
|
||||
* @param attributes the list of fields to check for inclusion against the field array
|
||||
* @param fields the master list; all attributes must be in this list to validate
|
||||
*/
|
||||
private void validateFields(Set<String> attributes, List<String> fields) {
|
||||
// validate the info fields
|
||||
if (validateFromHeader) {
|
||||
|
|
@ -234,15 +255,15 @@ public class VCF4Codec implements FeatureCodec {
|
|||
private List<Allele> parseAlleles(String ref, String alts) {
|
||||
List<Allele> alleles = new ArrayList<Allele>(2); // we are almost always biallelic
|
||||
// ref
|
||||
checkAllele(ref);
|
||||
checkAllele(ref, true);
|
||||
Allele refAllele = Allele.create(ref, true);
|
||||
alleles.add(refAllele);
|
||||
|
||||
if ( alts.indexOf(",") == -1 ) // only 1 alternatives, don't call string split
|
||||
parseSingleAllele(alleles, alts);
|
||||
parseSingleAllele(alleles, alts, false);
|
||||
else
|
||||
for ( String alt : Utils.split(alts, ",") )
|
||||
parseSingleAllele(alleles, alt);
|
||||
parseSingleAllele(alleles, alt, false);
|
||||
|
||||
return alleles;
|
||||
}
|
||||
|
|
@ -250,9 +271,10 @@ public class VCF4Codec implements FeatureCodec {
|
|||
/**
|
||||
* check to make sure the allele is an acceptable allele
|
||||
* @param allele the allele to check
|
||||
* @param isRef are we the reference allele?
|
||||
*/
|
||||
private static void checkAllele(String allele) {
|
||||
if ( ! Allele.acceptableAlleleBases(allele) ) {
|
||||
private static void checkAllele(String allele,boolean isRef) {
|
||||
if ( ! Allele.acceptableAlleleBases(allele,isRef) ) {
|
||||
throw new StingException("Unparsable vcf record with allele " + allele);
|
||||
}
|
||||
}
|
||||
|
|
@ -261,9 +283,10 @@ public class VCF4Codec implements FeatureCodec {
|
|||
* parse a single allele, given the allele list
|
||||
* @param alleles the alleles available
|
||||
* @param alt the allele to parse
|
||||
* @param isRef are we the reference allele?
|
||||
*/
|
||||
private void parseSingleAllele(List<Allele> alleles, String alt) {
|
||||
checkAllele(alt);
|
||||
private void parseSingleAllele(List<Allele> alleles, String alt, boolean isRef) {
|
||||
checkAllele(alt,isRef);
|
||||
|
||||
Allele allele = Allele.create(alt, false);
|
||||
if ( ! allele.isNoCall() )
|
||||
|
|
@ -304,10 +327,12 @@ public class VCF4Codec implements FeatureCodec {
|
|||
|
||||
/**
|
||||
* parse out the VCF line
|
||||
*
|
||||
* @param parts the parts split up
|
||||
* @return a variant context object
|
||||
*/
|
||||
private VariantContext parseVCFLine(String[] parts) {
|
||||
// parse out the required fields
|
||||
String contig = parts[0];
|
||||
long pos = Long.valueOf(parts[1]);
|
||||
String id = parts[2];
|
||||
|
|
@ -316,26 +341,31 @@ public class VCF4Codec implements FeatureCodec {
|
|||
Double qual = parseQual(parts[5]);
|
||||
String filter = parts[6];
|
||||
String info = parts[7];
|
||||
String GT = parts[8];
|
||||
int genotypesStart = 9;
|
||||
|
||||
List<Allele> alleles = parseAlleles(ref, alts);
|
||||
|
||||
List<Allele> alleles = parseAlleles(ref, alts);
|
||||
Set<String> filters = parseFilters(filter);
|
||||
Map<String, Object> attributes = parseInfo(info, id);
|
||||
|
||||
// parse genotypes
|
||||
int nGTKeys = ParsingUtils.split(GT, GTKeys, ':');
|
||||
Map<String, Genotype> genotypes = new HashMap<String, Genotype>(Math.max(parts.length - genotypesStart, 1));
|
||||
Iterator<String> iter = header.getGenotypeSamples().iterator();
|
||||
// find out our current location, and clip the alleles down to their minimum length
|
||||
Pair<GenomeLoc, List<Allele>> locAndAlleles = (ref.length() > 1) ?
|
||||
clipAlleles(contig, pos, ref, alleles) :
|
||||
new Pair<GenomeLoc, List<Allele>>(GenomeLocParser.createGenomeLoc(contig, pos), alleles);
|
||||
|
||||
Pair<GenomeLoc,List<Allele>> locAndAlleles = (ref.length() > 1) ?
|
||||
clipAlleles(contig,pos,ref,alleles) :
|
||||
new Pair<GenomeLoc,List<Allele>>(GenomeLocParser.createGenomeLoc(contig,pos),alleles);
|
||||
// a map to store our genotypes
|
||||
Map<String, Genotype> genotypes = null;
|
||||
|
||||
// do we have genotyping data
|
||||
if (parts.length > 8) {
|
||||
String GT = parts[8];
|
||||
int genotypesStart = 9;
|
||||
// parse genotypes
|
||||
int nGTKeys = ParsingUtils.split(GT, genotypeKeyArray, ':');
|
||||
genotypes = new HashMap<String, Genotype>(Math.max(parts.length - genotypesStart, 1));
|
||||
Iterator<String> iter = header.getGenotypeSamples().iterator();
|
||||
|
||||
if ( parseGenotypesToo ) {
|
||||
alleleMap.clear();
|
||||
for ( int genotypeOffset = genotypesStart; genotypeOffset < parts.length; genotypeOffset++ ) {
|
||||
for (int genotypeOffset = genotypesStart; genotypeOffset < parts.length; genotypeOffset++) {
|
||||
String sample = parts[genotypeOffset];
|
||||
String[] GTValues = CachedGTValues;
|
||||
ParsingUtils.split(sample, GTValues, ':');
|
||||
|
|
@ -345,30 +375,30 @@ public class VCF4Codec implements FeatureCodec {
|
|||
|
||||
// todo -- the parsing of attributes could be made lazy for performance
|
||||
Map<String, String> gtAttributes = null;
|
||||
if ( nGTKeys > 1 ) {
|
||||
if (nGTKeys > 1) {
|
||||
gtAttributes = new HashMap<String, String>(nGTKeys - 1);
|
||||
for ( int i = 1; i < nGTKeys; i++ ) {
|
||||
if ( GTKeys[i].equals("GQ") ) {
|
||||
for (int i = 1; i < nGTKeys; i++) {
|
||||
if (genotypeKeyArray[i].equals("GQ")) {
|
||||
GTQual = parseQual(GTValues[i]);
|
||||
} if ( GTKeys[i].equals("FL") ) { // deal with genotype filters here
|
||||
}
|
||||
if (genotypeKeyArray[i].equals("FL")) { // deal with genotype filters here
|
||||
genotypeFilters.addAll(parseFilters(GTValues[i]));
|
||||
} else {
|
||||
gtAttributes.put(GTKeys[i], GTValues[i]);
|
||||
gtAttributes.put(genotypeKeyArray[i], GTValues[i]);
|
||||
}
|
||||
}
|
||||
// validate the format fields
|
||||
validateFields(gtAttributes.keySet(),formatFields);
|
||||
validateFields(gtAttributes.keySet(), formatFields);
|
||||
}
|
||||
|
||||
boolean phased = GTKeys[0].charAt(1) == '|';
|
||||
boolean phased = genotypeKeyArray[0].charAt(1) == '|';
|
||||
|
||||
Genotype g = new Genotype(iter.next(), genotypeAlleles, GTQual, genotypeFilters, gtAttributes, phased);
|
||||
genotypes.put(g.getSampleName(), g);
|
||||
}
|
||||
}
|
||||
|
||||
// todo -- we need access to our track name to name the variant context
|
||||
return new VariantContext("foo", locAndAlleles.first, locAndAlleles.second, genotypes, qual, filters, attributes);
|
||||
return new VariantContext(name, locAndAlleles.first, locAndAlleles.second, genotypes, qual, filters, attributes);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -434,4 +464,20 @@ public class VCF4Codec implements FeatureCodec {
|
|||
if (clazz != VCFHeader.class) throw new ClassCastException("expecting class " + clazz + " but VCF4Codec provides " + VCFHeader.class);
|
||||
return this.header;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the name of this codec
|
||||
* @return our set name
|
||||
*/
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
/**
|
||||
* set the name of this codec
|
||||
* @param name
|
||||
*/
|
||||
public void setName(String name) {
|
||||
this.name = name;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -92,7 +92,7 @@ public class VCF4ReaderTestWalker extends RodWalker<VCFRecord,Long> {
|
|||
String[] parts = new String[10000];
|
||||
public void onTraversalDone(Long num){
|
||||
VCF4Codec vcf4codec = new VCF4Codec();
|
||||
VCF4Codec.parseGenotypesToo = splitFile == ParsingStatus.GENOTYPES;
|
||||
//VCF4Codec.parseGenotypesToo = splitFile == ParsingStatus.GENOTYPES;
|
||||
VCF4Codec.validate = ! DontValidate;
|
||||
|
||||
VCFCodec vcf3codec = new VCFCodec();
|
||||
|
|
|
|||
|
|
@ -26,7 +26,8 @@ import java.util.Set;
|
|||
* test out pieces of the VCF 4 codec.
|
||||
*/
|
||||
public class VCF4UnitTest extends BaseTest {
|
||||
File vcfFile = new File("testdata/vcfExmaple.vcf4");
|
||||
File vcfGenotypeFile = new File("testdata/vcf/vcfWithGenotypes.vcf");
|
||||
File vcfNoGenotypeFile = new File("testdata/vcf/vcfWithoutGenotypes.vcf");
|
||||
|
||||
// setup the contig ordering
|
||||
@BeforeClass
|
||||
|
|
@ -42,7 +43,7 @@ public class VCF4UnitTest extends BaseTest {
|
|||
|
||||
@Test
|
||||
public void testReadBasicHeader() {
|
||||
TestSetup testSetup = new TestSetup().invoke(vcfFile);
|
||||
TestSetup testSetup = new TestSetup().invoke(vcfGenotypeFile);
|
||||
|
||||
int seenRecords = 0;
|
||||
|
||||
|
|
@ -95,7 +96,7 @@ public class VCF4UnitTest extends BaseTest {
|
|||
|
||||
@Test
|
||||
public void testOutputHeader() {
|
||||
TestSetup testSetup = new TestSetup().invoke(vcfFile);
|
||||
TestSetup testSetup = new TestSetup().invoke(vcfGenotypeFile);
|
||||
|
||||
File tempFile = null;
|
||||
try {
|
||||
|
|
@ -115,9 +116,8 @@ public class VCF4UnitTest extends BaseTest {
|
|||
|
||||
@Test
|
||||
public void testCountVCF4Records() {
|
||||
TestSetup testSetup = new TestSetup().invoke(vcfFile);
|
||||
TestSetup testSetup = new TestSetup().invoke(vcfGenotypeFile);
|
||||
AsciiLineReader reader = testSetup.getReader();
|
||||
VCF4Codec codec = testSetup.getCodec();
|
||||
|
||||
// now parse the lines
|
||||
String line = null;
|
||||
|
|
@ -139,6 +139,33 @@ public class VCF4UnitTest extends BaseTest {
|
|||
Assert.fail("Failed to read a line");
|
||||
}
|
||||
}
|
||||
Assert.assertEquals(7,recordCount);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCountVCF4RecordsWithoutGenotypes() {
|
||||
TestSetup testSetup = new TestSetup().invoke(vcfNoGenotypeFile);
|
||||
AsciiLineReader reader = testSetup.getReader();
|
||||
|
||||
// now parse the lines
|
||||
String line = null;
|
||||
try {
|
||||
line = reader.readLine();
|
||||
} catch (IOException e) {
|
||||
Assert.fail("Failed to read a line");
|
||||
}
|
||||
|
||||
// our record count
|
||||
int recordCount = 0;
|
||||
while (line != null) {
|
||||
try {
|
||||
recordCount++;
|
||||
testSetup.codec.decode(line);
|
||||
line = reader.readLine();
|
||||
} catch (IOException e) {
|
||||
Assert.fail("Failed to read a line");
|
||||
}
|
||||
}
|
||||
Assert.assertEquals(6,recordCount);
|
||||
}
|
||||
|
||||
|
|
@ -150,25 +177,25 @@ public class VCF4UnitTest extends BaseTest {
|
|||
|
||||
@Test
|
||||
public void testCheckInfoValidation() {
|
||||
TestSetup testSetup = new TestSetup().invoke(vcfFile);
|
||||
TestSetup testSetup = new TestSetup().invoke(vcfGenotypeFile);
|
||||
testSetup.codec.decode(regularLine);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCheckTwoFewInfoValidation() {
|
||||
TestSetup testSetup = new TestSetup().invoke(vcfFile);
|
||||
TestSetup testSetup = new TestSetup().invoke(vcfGenotypeFile);
|
||||
testSetup.codec.decode(twoFewInfoLine);
|
||||
}
|
||||
|
||||
@Test(expected=StingException.class)
|
||||
public void testCheckTwoManyInfoValidation() {
|
||||
TestSetup testSetup = new TestSetup().invoke(vcfFile);
|
||||
TestSetup testSetup = new TestSetup().invoke(vcfGenotypeFile);
|
||||
testSetup.codec.decode(twoManyInfoLine);
|
||||
}
|
||||
|
||||
//File largeVCF = new File("/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/1000GenomesTable1/dindel-v2/CEU.low_coverage.2010_06.indel.genotypes.vcf");
|
||||
/* File largeVCF = new File("/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/1000GenomesTable1/dindel-v2/CEU.low_coverage.2010_06.indel.genotypes.vcf");
|
||||
|
||||
/*@Test
|
||||
@Test
|
||||
public void checkLargeVCF() {
|
||||
TestSetup testSetup = new TestSetup().invoke(largeVCF);
|
||||
AsciiLineReader reader = testSetup.getReader();
|
||||
|
|
|
|||
Loading…
Reference in New Issue