diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/PlinkRod.java b/java/src/org/broadinstitute/sting/gatk/refdata/PlinkRod.java index 506087adf..25c4c177c 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/PlinkRod.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/PlinkRod.java @@ -1,671 +1,678 @@ -//package org.broadinstitute.sting.gatk.refdata; -// -//import org.broadinstitute.sting.utils.BaseUtils; -//import org.broadinstitute.sting.utils.GenomeLoc; -//import org.broadinstitute.sting.utils.GenomeLocParser; -//import org.broadinstitute.sting.utils.StingException; -//import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; -//import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; -//import org.broadinstitute.sting.gatk.CommandLineGATK; -// -//import java.io.*; -//import java.util.*; -// -//import net.sf.samtools.SAMFileHeader; -// -///** -// * Created by IntelliJ IDEA. -// * User: chartl -// * Date: Jan 19, 2010 -// * Time: 10:24:18 AM -// * To change this template use File | Settings | File Templates. -// */ -//public class PlinkRod extends BasicReferenceOrderedDatum implements ReferenceOrderedDatum { -// private final Set headerEntries = new HashSet(Arrays.asList("#Family ID","Individual ID","Sex", -// "Paternal ID","Maternal ID","Phenotype", "FID","IID","PAT","MAT","SEX","PHENOTYPE")); -// private final byte SNP_MAJOR_MODE = 1; -// -// private ArrayList variants; -// private PlinkVariantInfo currentVariant; -// private ListIterator variantIterator; -// -// private PlinkFileType plinkFileType; -// -// public enum PlinkFileType { -// STANDARD_PED,RAW_PED,BINARY_PED -// } -// -// // // // CONSTRUCTOR // // // -// -// public PlinkRod(String name) { -// super(name); -// } -// -// @Override -// public Object initialize(final File plinkFile) throws FileNotFoundException { -// if ( ! plinkFile.exists() ) { -// throw new FileNotFoundException("File "+plinkFile.getAbsolutePath()+" does not exist."); -// } -// -// variants = parsePlinkFile(plinkFile); -// if ( variants != null ) { -// variantIterator = variants.listIterator(); -// currentVariant = variantIterator.next(); -// } -// -// assertNotNull(); -// -// return null; -// } -// -// private void assertNotNull() { -// if ( currentVariant == null ) { -// throw new UnsupportedOperationException ( "Current sequenom variant information was set to null" ); -// } -// } -// -// @Override -// public boolean parseLine(Object obj, String[] args) { -// if ( variantIterator.hasNext() ) { -// currentVariant = variantIterator.next(); -// return true; -// } else { -// return false; -// } -// } -// -// @Override -// public GenomeLoc getLocation() { -// return currentVariant.getLocation(); -// } -// -// @Override -// public String toString() { -// return currentVariant == null ? "" : currentVariant.toString(); -// } -// -// public String getVariantName() { -// return currentVariant.getName(); -// } -// -// public ArrayList getVariantSampleNames() { -// return currentVariant.getSampleNames(); -// } -// -// public ArrayList getGenotypes() { -// return currentVariant.getGenotypes(); -// } -// -// public boolean variantIsSNP() { -// return currentVariant.isSNP(); -// } -// -// -// // AM I PARSING A TEXT OR A BINARY FILE ?? -// -// private ArrayList parsePlinkFile(File file) { -// String[] splitFileName = file.getName().split("\\."); -// String extension = splitFileName[splitFileName.length-1]; -// if ( extension.equals("ped") || extension.equals("raw") ) { -// return parseTextFormattedPlinkFile(file); -// } else if ( extension.equals("bed") || extension.equals("bim") || extension.equals("fam") ) { -// plinkFileType = PlinkFileType.BINARY_PED; -// return parseBinaryFormattedPlinkFile(file); -// } else { -// System.out.println("Warning -- Plink file does not have a standard extension (ped/raw for text, bed/bim/fam for binary) -- assuming ped format"); -// return parseTextFormattedPlinkFile(file); -// } -// -// } -// -// /* *** *** *** *** *** ** *** ** *** ** *** ** *** ** *** -// * * PARSING STANDARD TEXT PED FILES * ** -// * *** *** *** *** *** ** *** ** *** ** *** ** *** ** ***/ -// -// private ArrayList parseTextFormattedPlinkFile( File file ) { -// try { -// BufferedReader reader = new BufferedReader( new FileReader ( file ) ); -// String header = reader.readLine(); -// ArrayList seqVars = instantiateVariantListFromHeader(header); -// ArrayList snpOffsets = getSNPOffsetsFromHeader(header); -// -// String line = null; -// do { -// line = reader.readLine(); -// incorporateInfo(seqVars,snpOffsets,line); -// } while ( line != null ); -// -// -// java.util.Collections.sort(seqVars); // because the comparable uses the GenomeLoc comparable; these -// // are sorted in standard reference order -// -// return seqVars; -// -// } catch ( FileNotFoundException e ) { -// throw new StingException("File "+file.getAbsolutePath()+" could not be found. This was checked earlier. Should never happen.",e); -// } catch ( IOException e ) { -// throw new StingException("Error reading file "+file.getAbsolutePath()+".",e); -// } -// } -// -// private void incorporateInfo(List vars, List offsets, String plinkLine) { -// if ( plinkLine == null ) { -// return; -// } -// -// String[] plinkInfo; -// if ( plinkFileType == PlinkFileType.STANDARD_PED) { -// plinkInfo = plinkLine.split("\t"); -// } else { -// throw new StingException("Plink file is likely of .raw or recoded format. Please use an uncoded .ped file."); -// } -// -// String individualName = plinkInfo[1]; -// -// int snpNumber = 0; -// -// if ( plinkFileType == PlinkFileType.STANDARD_PED ) { -// for ( int i : offsets ) { -// vars.get(snpNumber).addGenotypeEntry(plinkInfo[i], individualName); -// snpNumber++; -// } -// } -// } -// -// private ArrayList instantiateVariantListFromHeader(String header) { -// if ( header.startsWith("#") ) {// first line is a comment; what we're used to seeing -// plinkFileType = PlinkFileType.STANDARD_PED; -// } else {// first line is the raw header; comes from de-binary-ing a .bed file -// plinkFileType = PlinkFileType.RAW_PED; -// throw new StingException("Plink file is likely of .raw or recoded format. Please use an uncoded .ped file."); -// } -// -// ArrayList seqVars = new ArrayList(); -// String[] headerFields; -// -// if ( plinkFileType == PlinkFileType.STANDARD_PED ) { -// headerFields = header.split("\t"); -// } else { -// throw new StingException("Plink file is likely of .raw or recoded format. Please use an uncoded .ped file."); -// } -// -// for ( String field : headerFields ) { -// if ( ! headerEntries.contains(field) ) { -// // not a standard header, so a variant -// seqVars.add(new PlinkVariantInfo(field)); -// } -// } -// -// return seqVars; -// } -// -// private ArrayList getSNPOffsetsFromHeader(String header) { -// ArrayList offsets = new ArrayList(); -// String[] headerFields; -// if ( plinkFileType == PlinkFileType.STANDARD_PED ) { -// headerFields = header.split("\t+"); -// } else { -// headerFields = header.split("\\s+"); -// } -// -// int offset = 0; -// -// for ( String field : headerFields ) { -// if ( ! headerEntries.contains(field) ) { -// offsets.add(offset); -// } -// offset++; -// } -// -// return offsets; -// } -// -// /* *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** -// * * PARSING BINARY PLINK BED/BIM/FAM FILES * * -// * *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***/ -// -// private ArrayList parseBinaryFormattedPlinkFile(File file) { -// PlinkBinaryTrifecta binaryFiles = getPlinkBinaryTriplet(file); -// ArrayList parsedVariants = instantiateVariantsFromBimFile(binaryFiles.bimFile); -// ArrayList sampleNames = getSampleNameOrderingFromFamFile(binaryFiles.famFile); -// ArrayList updatedVariants = getGenotypesFromBedFile(parsedVariants,sampleNames,binaryFiles.bedFile); -// -// java.util.Collections.sort(updatedVariants); -// -// return updatedVariants; -// } -// -// private PlinkBinaryTrifecta getPlinkBinaryTriplet(File file) { -// // just gonna parse the name -// PlinkBinaryTrifecta trifecta = new PlinkBinaryTrifecta(); -// String absolute_path = file.getAbsolutePath(); -// String[] directory_tree = absolute_path.split("/"); -// String file_name = directory_tree[directory_tree.length-1].split("\\.")[0]; -// StringBuilder pathBuilder = new StringBuilder(); -// for ( int i = 0; i < directory_tree.length - 1; i ++ ) { -// pathBuilder.append(String.format("%s/",directory_tree[i])); -// } -// String path = pathBuilder.toString(); -// trifecta.bedFile = new File(path+file_name+".bed"); -// trifecta.bimFile = new File(path+file_name+".bim"); -// trifecta.famFile = new File(path+file_name+".fam"); -// -// return trifecta; -// -// } -// -// private ArrayList instantiateVariantsFromBimFile(File bimFile) { -// BufferedReader reader; -// try { -// reader = new BufferedReader( new FileReader( bimFile )); -// } catch ( FileNotFoundException e) { -// throw new StingException("The SNP information file accompanying the binary ped file was not found (the .bim file). "+ -// "Please ensure that it is in the same directory as the .bed and .fam files. The file we "+ -// "Were looking for was "+bimFile.getAbsolutePath(),e); -// } -// -// ArrayList variants = new ArrayList(); -// -// try { -// String line = null; -// do { -// line = reader.readLine(); -// if ( line != null ) { -// String[] snpInfo = line.split("\\s+"); -// PlinkVariantInfo variant = new PlinkVariantInfo(snpInfo[1],true); -// variant.setGenomeLoc(GenomeLocParser.parseGenomeLoc(snpInfo[0],Long.valueOf(snpInfo[3]), Long.valueOf(snpInfo[3]))); -// variant.setAlleles(snpInfo[4],snpInfo[5]); -// variants.add(variant); -// } -// } while ( line != null ); -// } catch ( IOException e ) { -// throw new StingException("There was an error reading the .bim file "+bimFile.getAbsolutePath(), e); -// } -// -// return variants; -// } -// -// private ArrayList getSampleNameOrderingFromFamFile(File famFile) { -// BufferedReader reader; -// try { -// reader = new BufferedReader( new FileReader( famFile )); -// } catch ( FileNotFoundException e) { -// throw new StingException("The Family information file accompanying the binary ped file was not found (the .fam file). "+ -// "Please ensure that it is in the same directory as the .bed and .bim files. The file we "+ -// "Were looking for was "+famFile.getAbsolutePath(),e); -// } -// -// ArrayList sampleNames = new ArrayList(); -// -// try { -// String line = null; -// do { -// line = reader.readLine(); -// if ( line != null ) { -// sampleNames.add(line.split("\\s+")[1]); -// } -// } while ( line != null ); -// } catch (IOException e) { -// throw new StingException("There was an error reading the .fam file "+famFile.getAbsolutePath(),e); -// } -// -// return sampleNames; -// } -// -// private ArrayList getGenotypesFromBedFile(ArrayList variants, ArrayList samples, File bedFile) { -// FileInputStream inStream; -// try { -// inStream = new FileInputStream(bedFile); -// } catch (FileNotFoundException e) { -// throw new StingException("The Binary pedigree file file accompanying the family file was not found (the .bed file). "+ -// "Please ensure that it is in the same directory as the .bim and .fam files. The file we "+ -// "Were looking for was "+bedFile.getAbsolutePath(),e); -// } -// -// try { -// byte genotype = -1; -// long bytesRead = 0; -// int snpOffset = 0; -// int sampleOffset = 0; -// boolean snpMajorMode = true; -// do { -// genotype = (byte) inStream.read(); -// bytesRead++; -// if ( genotype != -1 ) { -// if ( bytesRead > 3 ) { -// addGenotypeByte(genotype,variants,samples,snpOffset,sampleOffset, snpMajorMode); -// -// if ( snpMajorMode ) { -// sampleOffset = sampleOffset + 4; -// if ( sampleOffset > samples.size() -1 ) { -// snpOffset ++; -// sampleOffset = 0; -// } -// } else { -// snpOffset = snpOffset + 4; -// if ( snpOffset > variants.size() -1 ) { -// sampleOffset ++; -// snpOffset = 0; -// } -// } -// -// } else { -// if ( bytesRead == 3) { -// snpMajorMode = genotype == SNP_MAJOR_MODE; -// } -// } -// } -// } while ( genotype != -1 ); -// } catch ( IOException e) { -// throw new StingException("Error reading binary ped file "+bedFile.getAbsolutePath(), e); -// } -// -// return variants; -// } -// -// private void addGenotypeByte(byte genotype, ArrayList variants, ArrayList sampleNames, int snpOffset, int sampleOffset, boolean major) { -// // four genotypes encoded in this byte -// int[] genotypes = parseGenotypes(genotype); -// for ( int g : genotypes ) { -// variants.get(snpOffset).addBinaryGenotypeEntry(g,sampleNames.get(sampleOffset)); -// -// if ( major ) { -// sampleOffset++; -// if ( sampleOffset > sampleNames.size()-1 ) { //using offsets for comparison; size 5 == offset 4 -// return; -// } -// } else { -// snpOffset++; -// if( snpOffset > variants.size()-1 ) { -// return; -// } -// } -// } -// } -// -// private int[] parseGenotypes(byte genotype) { -// int[] genotypes = new int[4]; -// genotypes[0] = ( genotype & 3 ); -// genotypes[1] = ( ( genotype & 12 ) >>> 2 ); -// genotypes[2] = ( ( genotype & 48 ) >>> 4 ); -// genotypes[3] = ( ( genotype & 192 ) >>> 6 ); -// return genotypes; -// } -//} -// -//class PlinkVariantInfo implements Comparable { -// private String variantName; -// private GenomeLoc loc; -// private ArrayList genotypes = new ArrayList(); -// private ArrayList sampleNames = new ArrayList(); -// -// private ArrayList indelHolder; -// private ArrayList sampleHolder; -// private int siteIndelLength; -// private Allele.AlleleType indelType; -// -// // for binary parsing -// -// private String locAllele1; -// private String locAllele2; -// -// public GenomeLoc getLocation() { -// return loc; -// } -// -// public String getName() { -// return variantName; -// } -// -// public ArrayList getSampleNames() { -// return sampleNames; -// } -// -// public ArrayList getGenotypes() { -// return genotypes; -// } -// -// public boolean isSNP() { -// return this.indelType == null; -// } -// -// public void setGenomeLoc(GenomeLoc loc) { -// this.loc = loc; -// } -// -// public void setAlleles(String al1, String al2) { -// if ( al1.equals("0") ) { -// // encoding for a site at which no variants were detected -// locAllele1 = al2; -// } else { -// locAllele1 = al1; -// } -// locAllele2 = al2; -// if ( ! isSNP() ) { -// siteIndelLength = Math.max(locAllele1.length(),locAllele2.length()); -// } -// -// } -// -// // CONSTRUCTOR -// -// public PlinkVariantInfo(String variantName) { -// this.variantName = variantName; -// parseName(); -// } -// -// public PlinkVariantInfo(String variantName, boolean onlyLookForIndelInfo ) { -// this.variantName = variantName; -// if ( onlyLookForIndelInfo ) { -// parseNameForIndels(); -// } else { -// parseName(); -// } -// } -// -// private void parseName() { -// String chrom = this.variantName.split("\\|c")[1].split("_")[0]; -// String pos = this.variantName.split("_p")[1].split("_")[0]; -// this.loc = GenomeLocParser.parseGenomeLoc(chrom+":"+pos); -// this.parseNameForIndels(); -// } -// -// private void parseNameForIndels() { -// if ( this.variantName.contains("_gI") || this.variantName.contains("_gD") ) { -// this.instantiateIndel(); -// } -// } -// -// private void instantiateIndel() { -// this.indelHolder = new ArrayList(); -// this.sampleHolder = new ArrayList(); -// this.siteIndelLength = -1; -// this.indelType = this.variantName.contains("_gI") ? Allele.AlleleType.INSERTION : Allele.AlleleType.DELETION; -// } -// -// public void addGenotypeEntry(String genotypeString, String sampleName) { -// // identify if we're dealing with a deletion -// if ( this.isSNP() ) { -// this.addSNP(genotypeString.split("\\s+"),sampleName); -// } else { -// this.addIndel(genotypeString.split("\\s+"),sampleName); -// } -// } -// -// public void addBinaryGenotypeEntry( int genoTYPE, String sampleName ) { -// String[] alleleStr = new String[2]; -// if ( genoTYPE == 0 ) { -// alleleStr[0] = locAllele1; -// alleleStr[1] = locAllele1; -// } else if (genoTYPE == 2) { -// alleleStr[0] = locAllele1; -// alleleStr[1] = locAllele2; -// } else if (genoTYPE == 3 ) { -// alleleStr[0] = locAllele2; -// alleleStr[1] = locAllele2; -// } else { -// alleleStr[0] = "0"; -// alleleStr[1] = "0"; -// } -// -// if ( this.isSNP() ) { -// this.addSNP(alleleStr,sampleName); -// } else { -// this.addIndel(alleleStr,sampleName); -// } -// } -// -// private void addSNP(String[] alleleStrings, String sampleName) { -// ArrayList alleles = new ArrayList(2); -// -// for ( String alStr : alleleStrings ) { -// alleles.add(new Allele(Allele.AlleleType.UNKNOWN_POINT_ALLELE,alStr)); -// } -// -// genotypes.add(new Genotype(alleles,sampleName,20.0) ); -// sampleNames.add(sampleName); -// } -// -// private void addIndel(String[] alleleStrings, String sampleName) { -// String alleleStr1 = alleleStrings[0]; -// String alleleStr2 = alleleStrings[1]; -// if ( alleleStr1.contains("-") ^ alleleStr2.contains("-") ) { -// // heterozygous indel -// if ( alleleStr1.contains("-") ) { -// this.addHetIndel(alleleStr2,sampleName) ; -// } else { -// this.addHetIndel(alleleStr1,sampleName); -// } -// } else { -// this.addHomIndel(alleleStr1, alleleStr2, sampleName); -// } -// } -// -// private void addHetIndel(String baseStr, String sampleName) { -// Allele ref; -// Allele alt; -// -// if ( indelType == Allele.AlleleType.INSERTION ) { -// ref = new Allele(Allele.AlleleType.REFERENCE,""); -// alt = new Allele(indelType,baseStr); -// } else { -// alt = new Allele(indelType,""); -// ref = new Allele(Allele.AlleleType.DELETION_REFERENCE,baseStr); -// } -// -// this.setIndelLength(alt,baseStr.length()); -// -// if ( ! indelHolder.isEmpty() ) { -// siteIndelLength = baseStr.length(); -// this.addHeldIndels(); -// } -// -// Genotype indel = new Genotype(Arrays.asList(ref,alt), sampleName, 20.0); -// this.setIndelGenotypeLength(indel,siteIndelLength); -// this.genotypes.add(indel); -// this.sampleNames.add(sampleName); -// } -// -// private void addHomIndel(String strand1, String strand2, String sampleName) { -// Allele allele1; -// Allele allele2; -// boolean reference; -// if ( indelType == Allele.AlleleType.DELETION ) { -// if ( strand1.contains("-") ) { -// // homozygous deletion -// allele1 = new Allele(indelType,""); -// allele2 = new Allele(indelType,""); -// reference = false; -// } else { -// allele1 = new Allele(Allele.AlleleType.DELETION_REFERENCE,strand1); -// allele2 = new Allele(Allele.AlleleType.DELETION_REFERENCE,strand2); -// reference = true; -// } -// } else { -// if ( strand1.contains("-") ) { -// // homozygous reference -// allele1 = new Allele(Allele.AlleleType.REFERENCE,""); -// allele2 = new Allele(Allele.AlleleType.REFERENCE,""); -// reference = true; -// } else { -// allele1 = new Allele(indelType,strand1); -// allele2 = new Allele(indelType,strand2); -// reference = false; -// } -// } -// -// if ( reference ) { -// if ( ! indelHolder.isEmpty() ) { -// siteIndelLength = strand1.length(); -// this.addHeldIndels(); -// } -// } -// -// if ( reference || siteIndelLength != -1 ) { // if we're ref or know the insertion/deletion length of the site -// Genotype gen = new Genotype(Arrays.asList(allele1,allele2), sampleName, 20.0); -// setIndelGenotypeLength(gen,siteIndelLength); -// this.genotypes.add(gen); -// this.sampleNames.add(sampleName); -// } else { // hold on the variants until we *do* know the in/del length at this site -// this.indelHolder.add(allele1); -// this.indelHolder.add(allele2); -// this.sampleHolder.add(sampleName); -// } -// -// } -// -// private void setIndelGenotypeLength(Genotype g, int length) { -// g.setAttribute(Genotype.StandardAttributes.DELETION_LENGTH,length); -// } -// -// private void addHeldIndels() { -// Allele del1; -// Allele del2; -// int startingSize = indelHolder.size(); -// for ( int i = 0; i < startingSize ; i+=2 ) { -// del1 = indelHolder.get(i); -// del2 = indelHolder.get(i+1); -// this.addIndelFromCache(del1,del2,sampleHolder.get(i/2)); -// if ( indelHolder.size() != startingSize ) { -// throw new StingException("Halting algorithm -- possible infinite loop"); -// } -// } -// indelHolder.clear(); -// sampleHolder.clear(); -// } -// -// private void addIndelFromCache ( Allele indel1, Allele indel2, String sampleName ) { -// this.setIndelLength(indel1,siteIndelLength); -// this.setIndelLength(indel2,siteIndelLength); -// Genotype indel = new Genotype(Arrays.asList(indel1,indel2),sampleName, 20.0); -// this.setIndelGenotypeLength(indel,siteIndelLength); -// this.genotypes.add(indel); -// this.sampleNames.add(sampleName); -// } -// -// public int compareTo(Object obj) { -// if ( ! ( obj instanceof PlinkVariantInfo) ) { -// return 1; -// } -// -// return loc.compareTo(((PlinkVariantInfo) obj).getLocation()); -// } -// -// private void setIndelLength(Allele al, int length) { -// // Todo -- once alleles support deletion lengths add that information -// // Todo -- into the object; for now this can just return -// return; -// } -//} -// -//class PlinkBinaryTrifecta { -// -// public PlinkBinaryTrifecta() { -// -// } -// -// public File bedFile; -// public File bimFile; -// public File famFile; -// -//} \ No newline at end of file +package org.broadinstitute.sting.gatk.refdata; +/* +import org.broadinstitute.sting.oneoffprojects.variantcontext.Allele; +import org.broadinstitute.sting.oneoffprojects.variantcontext.Genotype; +import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.StingException; +import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; +import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; +import org.broadinstitute.sting.gatk.CommandLineGATK; + +import java.io.*; +import java.util.*; + +import net.sf.samtools.SAMFileHeader; + +/** + * Created by IntelliJ IDEA. + * User: chartl + * Date: Jan 19, 2010 + * Time: 10:24:18 AM + * To change this template use File | Settings | File Templates. + * +public class PlinkRod extends BasicReferenceOrderedDatum implements ReferenceOrderedDatum { + private final Set headerEntries = new HashSet(Arrays.asList("#Family ID","Individual ID","Sex", + "Paternal ID","Maternal ID","Phenotype", "FID","IID","PAT","MAT","SEX","PHENOTYPE")); + private final byte SNP_MAJOR_MODE = 1; + + private ArrayList variants; + private PlinkVariantInfo currentVariant; + private ListIterator variantIterator; + + private PlinkFileType plinkFileType; + + public enum PlinkFileType { + STANDARD_PED,RAW_PED,BINARY_PED + } + +// CONSTRUCTOR + + public PlinkRod(String name) { + super(name); + } + + @Override + public Object initialize(final File plinkFile) throws FileNotFoundException { + if ( ! plinkFile.exists() ) { + throw new FileNotFoundException("File "+plinkFile.getAbsolutePath()+" does not exist."); + } + + variants = parsePlinkFile(plinkFile); + if ( variants != null ) { + variantIterator = variants.listIterator(); + currentVariant = variantIterator.next(); + } + + assertNotNull(); + + return null; + } + + private void assertNotNull() { + if ( currentVariant == null ) { + throw new UnsupportedOperationException ( "Current sequenom variant information was set to null" ); + } + } + + @Override + public boolean parseLine(Object obj, String[] args) { + if ( variantIterator.hasNext() ) { + currentVariant = variantIterator.next(); + return true; + } else { + return false; + } + } + + @Override + public GenomeLoc getLocation() { + return currentVariant.getLocation(); + } + + @Override + public String toString() { + return currentVariant == null ? "" : currentVariant.toString(); + } + + public String getVariantName() { + return currentVariant.getName(); + } + + public ArrayList getVariantSampleNames() { + return currentVariant.getSampleNames(); + } + + public ArrayList getGenotypes() { + return currentVariant.getGenotypes(); + } + + public boolean variantIsSNP() { + return currentVariant.isSNP(); + } + + +// AM I PARSING A TEXT OR A BINARY FILE ?? + + private ArrayList parsePlinkFile(File file) { + String[] splitFileName = file.getName().split("\\."); + String extension = splitFileName[splitFileName.length-1]; + if ( extension.equals("ped") || extension.equals("raw") ) { + return parseTextFormattedPlinkFile(file); + } else if ( extension.equals("bed") || extension.equals("bim") || extension.equals("fam") ) { + plinkFileType = PlinkFileType.BINARY_PED; + return parseBinaryFormattedPlinkFile(file); + } else { + System.out.println("Warning -- Plink file does not have a standard extension (ped/raw for text, bed/bim/fam for binary) -- assuming ped format"); + return parseTextFormattedPlinkFile(file); + } + + } + + /* *** *** *** *** *** ** *** ** *** ** *** ** *** ** *** + * * PARSING STANDARD TEXT PED FILES * ** + * *** *** *** *** *** ** *** ** *** ** *** ** *** ** ***/ +/* + private ArrayList parseTextFormattedPlinkFile( File file ) { + try { + BufferedReader reader = new BufferedReader( new FileReader ( file ) ); + String header = reader.readLine(); + ArrayList seqVars = instantiateVariantListFromHeader(header); + ArrayList snpOffsets = getSNPOffsetsFromHeader(header); + + String line = null; + do { + line = reader.readLine(); + incorporateInfo(seqVars,snpOffsets,line); + } while ( line != null ); + + + java.util.Collections.sort(seqVars); // because the comparable uses the GenomeLoc comparable; these + // are sorted in standard reference order + + return seqVars; + + } catch ( FileNotFoundException e ) { + throw new StingException("File "+file.getAbsolutePath()+" could not be found. This was checked earlier. Should never happen.",e); + } catch ( IOException e ) { + throw new StingException("Error reading file "+file.getAbsolutePath()+".",e); + } + } + + private void incorporateInfo(List vars, List offsets, String plinkLine) { + if ( plinkLine == null ) { + return; + } + + String[] plinkInfo; + if ( plinkFileType == PlinkFileType.STANDARD_PED) { + plinkInfo = plinkLine.split("\t"); + } else { + throw new StingException("Plink file is likely of .raw or recoded format. Please use an uncoded .ped file."); + } + + String individualName = plinkInfo[1]; + + int snpNumber = 0; + + if ( plinkFileType == PlinkFileType.STANDARD_PED ) { + for ( int i : offsets ) { + vars.get(snpNumber).addGenotypeEntry(plinkInfo[i], individualName); + snpNumber++; + } + } + } + + private ArrayList instantiateVariantListFromHeader(String header) { + if ( header.startsWith("#") ) {// first line is a comment; what we're used to seeing + plinkFileType = PlinkFileType.STANDARD_PED; + } else {// first line is the raw header; comes from de-binary-ing a .bed file + plinkFileType = PlinkFileType.RAW_PED; + throw new StingException("Plink file is likely of .raw or recoded format. Please use an uncoded .ped file."); + } + + ArrayList seqVars = new ArrayList(); + String[] headerFields; + + if ( plinkFileType == PlinkFileType.STANDARD_PED ) { + headerFields = header.split("\t"); + } else { + throw new StingException("Plink file is likely of .raw or recoded format. Please use an uncoded .ped file."); + } + + for ( String field : headerFields ) { + if ( ! headerEntries.contains(field) ) { + // not a standard header, so a variant + seqVars.add(new PlinkVariantInfo(field)); + } + } + + return seqVars; + } + + private ArrayList getSNPOffsetsFromHeader(String header) { + ArrayList offsets = new ArrayList(); + String[] headerFields; + if ( plinkFileType == PlinkFileType.STANDARD_PED ) { + headerFields = header.split("\t+"); + } else { + headerFields = header.split("\\s+"); + } + + int offset = 0; + + for ( String field : headerFields ) { + if ( ! headerEntries.contains(field) ) { + offsets.add(offset); + } + offset++; + } + + return offsets; + } + + /* *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** + * * PARSING BINARY PLINK BED/BIM/FAM FILES * * + * *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***/ + /* + private ArrayList parseBinaryFormattedPlinkFile(File file) { + PlinkBinaryTrifecta binaryFiles = getPlinkBinaryTriplet(file); + ArrayList parsedVariants = instantiateVariantsFromBimFile(binaryFiles.bimFile); + ArrayList sampleNames = getSampleNameOrderingFromFamFile(binaryFiles.famFile); + ArrayList updatedVariants = getGenotypesFromBedFile(parsedVariants,sampleNames,binaryFiles.bedFile); + + java.util.Collections.sort(updatedVariants); + + return updatedVariants; + } + + private PlinkBinaryTrifecta getPlinkBinaryTriplet(File file) { + // just gonna parse the name + PlinkBinaryTrifecta trifecta = new PlinkBinaryTrifecta(); + String absolute_path = file.getAbsolutePath(); + String[] directory_tree = absolute_path.split("/"); + String file_name = directory_tree[directory_tree.length-1].split("\\.")[0]; + StringBuilder pathBuilder = new StringBuilder(); + for ( int i = 0; i < directory_tree.length - 1; i ++ ) { + pathBuilder.append(String.format("%s/",directory_tree[i])); + } + String path = pathBuilder.toString(); + trifecta.bedFile = new File(path+file_name+".bed"); + trifecta.bimFile = new File(path+file_name+".bim"); + trifecta.famFile = new File(path+file_name+".fam"); + + return trifecta; + + } + + private ArrayList instantiateVariantsFromBimFile(File bimFile) { + BufferedReader reader; + try { + reader = new BufferedReader( new FileReader( bimFile )); + } catch ( FileNotFoundException e) { + throw new StingException("The SNP information file accompanying the binary ped file was not found (the .bim file). "+ + "Please ensure that it is in the same directory as the .bed and .fam files. The file we "+ + "Were looking for was "+bimFile.getAbsolutePath(),e); + } + + ArrayList variants = new ArrayList(); + + try { + String line = null; + do { + line = reader.readLine(); + if ( line != null ) { + String[] snpInfo = line.split("\\s+"); + PlinkVariantInfo variant = new PlinkVariantInfo(snpInfo[1],true); + variant.setGenomeLoc(GenomeLocParser.parseGenomeLoc(snpInfo[0],Long.valueOf(snpInfo[3]), Long.valueOf(snpInfo[3]))); + variant.setAlleles(snpInfo[4],snpInfo[5]); + variants.add(variant); + } + } while ( line != null ); + } catch ( IOException e ) { + throw new StingException("There was an error reading the .bim file "+bimFile.getAbsolutePath(), e); + } + + return variants; + } + + private ArrayList getSampleNameOrderingFromFamFile(File famFile) { + BufferedReader reader; + try { + reader = new BufferedReader( new FileReader( famFile )); + } catch ( FileNotFoundException e) { + throw new StingException("The Family information file accompanying the binary ped file was not found (the .fam file). "+ + "Please ensure that it is in the same directory as the .bed and .bim files. The file we "+ + "Were looking for was "+famFile.getAbsolutePath(),e); + } + + ArrayList sampleNames = new ArrayList(); + + try { + String line = null; + do { + line = reader.readLine(); + if ( line != null ) { + sampleNames.add(line.split("\\s+")[1]); + } + } while ( line != null ); + } catch (IOException e) { + throw new StingException("There was an error reading the .fam file "+famFile.getAbsolutePath(),e); + } + + return sampleNames; + } + + private ArrayList getGenotypesFromBedFile(ArrayList variants, ArrayList samples, File bedFile) { + FileInputStream inStream; + try { + inStream = new FileInputStream(bedFile); + } catch (FileNotFoundException e) { + throw new StingException("The Binary pedigree file file accompanying the family file was not found (the .bed file). "+ + "Please ensure that it is in the same directory as the .bim and .fam files. The file we "+ + "Were looking for was "+bedFile.getAbsolutePath(),e); + } + + try { + byte genotype = -1; + long bytesRead = 0; + int snpOffset = 0; + int sampleOffset = 0; + boolean snpMajorMode = true; + do { + genotype = (byte) inStream.read(); + bytesRead++; + if ( genotype != -1 ) { + if ( bytesRead > 3 ) { + addGenotypeByte(genotype,variants,samples,snpOffset,sampleOffset, snpMajorMode); + + if ( snpMajorMode ) { + sampleOffset = sampleOffset + 4; + if ( sampleOffset > samples.size() -1 ) { + snpOffset ++; + sampleOffset = 0; + } + } else { + snpOffset = snpOffset + 4; + if ( snpOffset > variants.size() -1 ) { + sampleOffset ++; + snpOffset = 0; + } + } + + } else { + if ( bytesRead == 3) { + snpMajorMode = genotype == SNP_MAJOR_MODE; + } + } + } + } while ( genotype != -1 ); + } catch ( IOException e) { + throw new StingException("Error reading binary ped file "+bedFile.getAbsolutePath(), e); + } + + return variants; + } + + private void addGenotypeByte(byte genotype, ArrayList variants, ArrayList sampleNames, int snpOffset, int sampleOffset, boolean major) { + // four genotypes encoded in this byte + int[] genotypes = parseGenotypes(genotype); + for ( int g : genotypes ) { + variants.get(snpOffset).addBinaryGenotypeEntry(g,sampleNames.get(sampleOffset)); + + if ( major ) { + sampleOffset++; + if ( sampleOffset > sampleNames.size()-1 ) {// using offsets for comparison; size 5 == offset 4 + return; + } + } else { + snpOffset++; + if( snpOffset > variants.size()-1 ) { + return; + } + } + } + } + + private int[] parseGenotypes(byte genotype) { + int[] genotypes = new int[4]; + genotypes[0] = ( genotype & 3 ); + genotypes[1] = ( ( genotype & 12 ) >>> 2 ); + genotypes[2] = ( ( genotype & 48 ) >>> 4 ); + genotypes[3] = ( ( genotype & 192 ) >>> 6 ); + return genotypes; + } +} + +class PlinkVariantInfo implements Comparable { + + private enum AlleleType { + INSERTION,DELETION + } + + private String variantName; + private GenomeLoc loc; + private ArrayList genotypes = new ArrayList(); + private ArrayList sampleNames = new ArrayList(); + + private ArrayList indelHolder; + private ArrayList sampleHolder; + private int siteIndelLength; + private AlleleType indelType; + + // for binary parsing + + private String locAllele1; + private String locAllele2; + + public GenomeLoc getLocation() { + return loc; + } + + public String getName() { + return variantName; + } + + public ArrayList getSampleNames() { + return sampleNames; + } + + public ArrayList getGenotypes() { + return genotypes; + } + + public boolean isSNP() { + return this.indelType == null; + } + + public void setGenomeLoc(GenomeLoc loc) { + this.loc = loc; + } + + public void setAlleles(String al1, String al2) { + if ( al1.equals("0") ) { + // encoding for a site at which no variants were detected + locAllele1 = al2; + } else { + locAllele1 = al1; + } + locAllele2 = al2; + if ( ! isSNP() ) { + siteIndelLength = Math.max(locAllele1.length(),locAllele2.length()); + } + + } + + // CONSTRUCTOR + + public PlinkVariantInfo(String variantName) { + this.variantName = variantName; + parseName(); + } + + public PlinkVariantInfo(String variantName, boolean onlyLookForIndelInfo ) { + this.variantName = variantName; + if ( onlyLookForIndelInfo ) { + parseNameForIndels(); + } else { + parseName(); + } + } + + private void parseName() { + String chrom = this.variantName.split("\\|c")[1].split("_")[0]; + String pos = this.variantName.split("_p")[1].split("_")[0]; + this.loc = GenomeLocParser.parseGenomeLoc(chrom+":"+pos); + this.parseNameForIndels(); + } + + private void parseNameForIndels() { + if ( this.variantName.contains("_gI") || this.variantName.contains("_gD") ) { + this.instantiateIndel(); + } + } + + private void instantiateIndel() { + this.indelHolder = new ArrayList(); + this.sampleHolder = new ArrayList(); + this.siteIndelLength = -1; + this.indelType = this.variantName.contains("_gI") ? AlleleType.INSERTION : AlleleType.DELETION; + } + + public void addGenotypeEntry(String genotypeString, String sampleName) { + // identify if we're dealing with a deletion + if ( this.isSNP() ) { + this.addSNP(genotypeString.split("\\s+"),sampleName); + } else { + this.addIndel(genotypeString.split("\\s+"),sampleName); + } + } + + public void addBinaryGenotypeEntry( int genoTYPE, String sampleName ) { + String[] alleleStr = new String[2]; + if ( genoTYPE == 0 ) { + alleleStr[0] = locAllele1; + alleleStr[1] = locAllele1; + } else if (genoTYPE == 2) { + alleleStr[0] = locAllele1; + alleleStr[1] = locAllele2; + } else if (genoTYPE == 3 ) { + alleleStr[0] = locAllele2; + alleleStr[1] = locAllele2; + } else { + alleleStr[0] = "0"; + alleleStr[1] = "0"; + } + + if ( this.isSNP() ) { + this.addSNP(alleleStr,sampleName); + } else { + this.addIndel(alleleStr,sampleName); + } + } + + private void addSNP(String[] alleleStrings, String sampleName) { + ArrayList alleles = new ArrayList(2); + + for ( String alStr : alleleStrings ) { + alleles.add(new Allele(alStr.getBytes())); + } + + genotypes.add(new Genotype(alleles,sampleName,20.0) ); + sampleNames.add(sampleName); + } + + private void addIndel(String[] alleleStrings, String sampleName) { + String alleleStr1 = alleleStrings[0]; + String alleleStr2 = alleleStrings[1]; + if ( alleleStr1.contains("-") ^ alleleStr2.contains("-") ) { + // heterozygous indel + if ( alleleStr1.contains("-") ) { + this.addHetIndel(alleleStr2,sampleName) ; + } else { + this.addHetIndel(alleleStr1,sampleName); + } + } else { + this.addHomIndel(alleleStr1, alleleStr2, sampleName); + } + } + + private void addHetIndel(String baseStr, String sampleName) { + Allele ref; + Allele alt; + + if ( indelType == AlleleType.INSERTION ) { + ref = new Allele("-",true); + alt = new Allele(baseStr.getBytes(),false); + } else { + alt = new Allele("-",false); + ref = new Allele(baseStr.getBytes(),true); + } + + // this.setIndelLength(alt,baseStr.length()); + + if ( ! indelHolder.isEmpty() ) { + siteIndelLength = baseStr.length(); + this.addHeldIndels(); + } + + Genotype indel = new Genotype(Arrays.asList(ref,alt), sampleName, 20.0); + // this.setIndelGenotypeLength(indel,siteIndelLength); + this.genotypes.add(indel); + this.sampleNames.add(sampleName); + } + + private void addHomIndel(String strand1, String strand2, String sampleName) { + Allele allele1; + Allele allele2; + boolean reference; + if ( indelType == AlleleType.DELETION ) { + if ( strand1.contains("-") ) { + // homozygous deletion + allele1 = new Allele("-",false); + allele2 = new Allele("-",false); + reference = false; + } else { // homozygous reference at a deletion variant site + allele1 = new Allele(strand1.getBytes(),true); + allele2 = new Allele(strand2.getBytes(),true); + reference = true; + } + } else { + if ( strand1.contains("-") ) { + // homozygous reference + allele1 = new Allele("-",true); + allele2 = new Allele("-",true); + reference = true; + } else { + allele1 = new Allele(strand1.getBytes(),false); + allele2 = new Allele(strand2.getBytes(),false); + reference = false; + } + } + + if ( reference ) { + if ( ! indelHolder.isEmpty() ) { + siteIndelLength = strand1.length(); + this.addHeldIndels(); + } + } + + if ( reference || siteIndelLength != -1 ) { // if we're ref or know the insertion/deletion length of the site + Genotype gen = new Genotype(Arrays.asList(allele1,allele2), sampleName, 20.0); + // setIndelGenotypeLength(gen,siteIndelLength); + this.genotypes.add(gen); + this.sampleNames.add(sampleName); + } else { // hold on the variants until we *do* know the in/del length at this site + this.indelHolder.add(allele1); + this.indelHolder.add(allele2); + this.sampleHolder.add(sampleName); + } + + } + +/* private void setIndelGenotypeLength(Genotype g, int length) { + g.setAttribute(Genotype.StandardAttributes.DELETION_LENGTH,length); + } + + private void addHeldIndels() { + Allele del1; + Allele del2; + int startingSize = indelHolder.size(); + for ( int i = 0; i < startingSize ; i+=2 ) { + del1 = indelHolder.get(i); + del2 = indelHolder.get(i+1); + this.addIndelFromCache(del1,del2,sampleHolder.get(i/2)); + if ( indelHolder.size() != startingSize ) { + throw new StingException("Halting algorithm -- possible infinite loop"); + } + } + indelHolder.clear(); + sampleHolder.clear(); + } + + private void addIndelFromCache ( Allele indel1, Allele indel2, String sampleName ) { + this.setIndelLength(indel1,siteIndelLength); + this.setIndelLength(indel2,siteIndelLength); + Genotype indel = new Genotype(Arrays.asList(indel1,indel2),sampleName, 20.0); + // this.setIndelGenotypeLength(indel,siteIndelLength); + this.genotypes.add(indel); + this.sampleNames.add(sampleName); + } + + public int compareTo(Object obj) { + if ( ! ( obj instanceof PlinkVariantInfo) ) { + return 1; + } + + return loc.compareTo(((PlinkVariantInfo) obj).getLocation()); + } + + private void setIndelLength(Allele al, int length) { + // Todo -- once alleles support deletion lengths add that information + // Todo -- into the object; for now this can just return + return; + } +} + +class PlinkBinaryTrifecta { + + public PlinkBinaryTrifecta() { + + } + + public File bedFile; + public File bimFile; + public File famFile; + +} */ \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/ReadErrorRateWalker.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/ReadErrorRateWalker.java index 2db84b5a9..89b56887d 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/ReadErrorRateWalker.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/ReadErrorRateWalker.java @@ -6,6 +6,8 @@ import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.BaseUtils; import net.sf.samtools.SAMRecord; +import java.util.Arrays; +import java.util.HashMap; import java.util.Random; /** @@ -17,7 +19,7 @@ import java.util.Random; * * @author Kiran Garimella */ -public class ReadErrorRateWalker extends ReadWalker { +public class ReadErrorRateWalker extends ReadWalker { @Argument(fullName="printVisualHits", shortName="v", doc="print visual hits", required=false) public boolean printVisualHits = false; @Argument(fullName="useNextBestBase", shortName="nb", doc="use next best base", required=false) public boolean useNextBestBase = false; @Argument(fullName="useNonNextBestBase",shortName="nnb",doc="use nonnext best base",required=false) public boolean useNonNextBestBase = false; @@ -134,38 +136,80 @@ public class ReadErrorRateWalker extends ReadWalker { * * @return null */ - public int[] reduceInit() { - return null; + public ReadErrorRateCollection reduceInit() { + return new ReadErrorRateCollection(); } /** * Summarize the error rate data. * * @param value the read mismatch array - * @param sum the summed mismatch array + * @param collection the summed mismatch array * @return the summed mismatch array with the new read mismatch array added */ - public int[] reduce(boolean[] value, int[] sum) { - if (sum == null) { - sum = new int[value.length]; - } + public ReadErrorRateCollection reduce(boolean[] value, ReadErrorRateCollection collection) { - for (int cycle = 0; cycle < value.length; cycle++) { - sum[cycle] += (value[cycle] ? 1 : 0); - } + collection.update(value); - return sum; + return collection; } /** * We've processed all the reads. Emit the final, normalized error rate data. * - * @param sum the summed mismatch array + * @param collection the summed mismatch arrays */ - public void onTraversalDone(int[] sum) { - for (int cycle = 0; cycle < sum.length - 1; cycle++) { - double errorrate = ((double) sum[cycle])/((double) sum[sum.length - 1]); - out.println("[ERROR_RATE] " + cycle + " " + errorrate); - } + public void onTraversalDone(ReadErrorRateCollection collection) { + + out.print(collection.toString()); } } + +class ReadErrorRateCollection { + private HashMap readsByReadLength; + + public ReadErrorRateCollection() { + readsByReadLength = new HashMap(); + } + + public void update(boolean[] mismatchArray) { + if ( ! readsByReadLength.containsKey(mismatchArray.length) ) { + readsByReadLength.put(mismatchArray.length, zeroArray(mismatchArray.length)); + } + + updateErrorCounts(readsByReadLength.get(mismatchArray.length), mismatchArray); + } + + public String toString() { + StringBuilder builder = new StringBuilder(); + for ( int length : readsByReadLength.keySet() ) { + for ( int cycle = 0; cycle < length-1; cycle++) { + int[] counts = readsByReadLength.get(length); + builder.append(length); + builder.append("\t"); + builder.append(cycle); + builder.append("\t"); + builder.append( ( ( double ) counts[cycle] / ( (double) counts[length-1]))); + builder.append("\n"); + } + } + return builder.toString(); + } + + private static int[] zeroArray( int length ) { + int[] array = new int[length]; + for ( int ii = 0; ii < length; ii ++ ) { + array[ii] = 0; + } + + return array; + } + + public static void updateErrorCounts(int[] sum, boolean[] value) { + + for (int cycle = 0; cycle < value.length; cycle++) { + sum[cycle] += (value[cycle] ? 1 : 0); + } + + } +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval/multisample/MultiSampleConcordanceSet.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval/multisample/MultiSampleConcordanceSet.java index ccc53d33b..05dbbf72d 100644 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval/multisample/MultiSampleConcordanceSet.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval/multisample/MultiSampleConcordanceSet.java @@ -22,11 +22,6 @@ class MultiSampleConcordanceSet { private long truthSitesFilteredOut; private int genotypeQuality; - private int truePositiveLoci; - private int trueNegativeLoci; - private int falsePositiveLoci; - private int falseNegativeLoci; - public MultiSampleConcordanceSet(int minDepth, boolean assumeRef, int genotypeQuality) { concordanceSet = new HashSet(); truthOnlySites = 0l; diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval/multisample/MultiSampleConcordanceWalker.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval/multisample/MultiSampleConcordanceWalker.java index 454778d91..a838510ad 100644 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval/multisample/MultiSampleConcordanceWalker.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval/multisample/MultiSampleConcordanceWalker.java @@ -58,35 +58,58 @@ public class MultiSampleConcordanceWalker extends RodWalker< LocusConcordanceInf ReferenceOrderedDatum truthData = tracker.lookup("truth", null); ReferenceOrderedDatum variantData = tracker.lookup("variants",null); LocusConcordanceInfo concordance; + if ( truthData == null && variantData == null) { + concordance = null; + } else if ( truthData == null ) { + // not in the truth set if ( ( (RodVCF) variantData ).isFiltered() ) { + concordance = null; + } else { + concordance = new LocusConcordanceInfo(LocusConcordanceInfo.ConcordanceType.VARIANT_SET,null, ( (RodVCF) variantData ).getRecord(),ref); } + } else if ( variantData == null ) { + // not in the variant set if ( ( (RodVCF) truthData).isFiltered() ) { + concordance = null; + } else { + concordance = new LocusConcordanceInfo(LocusConcordanceInfo.ConcordanceType.TRUTH_SET,( (RodVCF) truthData).getRecord(),null,ref); } + } else { + // in both // check for filtering boolean truth_filter = ((RodVCF) truthData).isFiltered(); boolean call_filter = ((RodVCF) variantData).isFiltered(); + if ( truth_filter && call_filter ) { + concordance = null; + } else if ( truth_filter ) { + concordance = new LocusConcordanceInfo(LocusConcordanceInfo.ConcordanceType.VARIANT_SET,null, ( (RodVCF) variantData ).getRecord(),ref); + } else if ( call_filter ) { + concordance = new LocusConcordanceInfo(LocusConcordanceInfo.ConcordanceType.TRUTH_SET_VARIANT_FILTERED,( (RodVCF) truthData).getRecord(), null ,ref); + } else { + concordance = new LocusConcordanceInfo(LocusConcordanceInfo.ConcordanceType.BOTH_SETS,( (RodVCF) truthData).getRecord(),( (RodVCF) variantData).getRecord(),ref); + } } diff --git a/java/test/org/broadinstitute/sting/gatk/refdata/PlinkRodTest.java b/java/test/org/broadinstitute/sting/gatk/refdata/PlinkRodTest.java index 509abfbbc..b259b994b 100755 --- a/java/test/org/broadinstitute/sting/gatk/refdata/PlinkRodTest.java +++ b/java/test/org/broadinstitute/sting/gatk/refdata/PlinkRodTest.java @@ -1,243 +1,247 @@ -//package org.broadinstitute.sting.gatk.refdata; -// -//import org.broadinstitute.sting.BaseTest; -//import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile; -//import org.broadinstitute.sting.utils.StingException; -//import org.broadinstitute.sting.utils.GenomeLocParser; -//import org.broadinstitute.sting.utils.GenomeLoc; -//import org.junit.BeforeClass; -//import org.junit.Test; -//import org.junit.Assert; -// -//import java.io.File; -//import java.io.FileNotFoundException; -//import java.io.BufferedReader; -//import java.io.FileReader; -//import java.util.*; -// -///** -// * Created by IntelliJ IDEA. -// * User: Ghost -// * Date: Jan 22, 2010 -// * Time: 11:27:33 PM -// * To change this template use File | Settings | File Templates. -// */ -//public class PlinkRodTest extends BaseTest { -// private static IndexedFastaSequenceFile seq; -// -// @BeforeClass -// public static void beforeTests() { -// try { -// seq = new IndexedFastaSequenceFile(new File(oneKGLocation + "reference/human_b36_both.fasta")); -// } catch (FileNotFoundException e) { -// throw new StingException("unable to load the sequence dictionary"); -// } -// GenomeLocParser.setupRefContigOrdering(seq); -// -// } -// -// public BufferedReader openFile(String filename) { -// try { -// return new BufferedReader(new FileReader(filename)); -// } catch (FileNotFoundException e) { -// throw new StingException("Couldn't open file " + filename); -// } -// -// } -// -// @Test -// public void testStandardPedFile() { -// PlinkRod rod = new PlinkRod("test"); -// try { -// rod.initialize( new File("/humgen/gsa-hpprojects/GATK/data/Validation_Data/test/plink_rod_test/standard_plink_test.ped") ); -// } catch ( FileNotFoundException e ) { -// throw new StingException("test file for testStandardPedFile() does not exist",e); -// } -// -// // test that the sample names are correct -// -// List rodNames = rod.getVariantSampleNames(); -// List expectedNames = Arrays.asList("NA12887","NAMELY","COWBA"); -// -// Assert.assertEquals("That there are as many samples in the rod as in the expected list",expectedNames.size(),rodNames.size()); -// -// boolean namesCorrect = true; -// for ( int i = 0; i < expectedNames.size(); i++ ) { -// namesCorrect = namesCorrect && ( rodNames.get(i).equals(expectedNames.get(i)) ); -// } -// -// Assert.assertTrue("That the names are correct and in the proper order",namesCorrect); -// -// // test that rod can be iterated over -// -// ArrayList> genotypesInRod = new ArrayList>(); -// ArrayList> sampleNamesInRod = new ArrayList>(); -// ArrayList lociInRod = new ArrayList(); -// do { -// genotypesInRod.add(rod.getGenotypes()); -// sampleNamesInRod.add(rod.getVariantSampleNames()); -// lociInRod.add(rod.getLocation()); -// } while ( rod.parseLine(null,null) ); -// -// Assert.assertEquals("That there are three SNPs in the ROD",3,genotypesInRod.size()); -// -// ArrayList snp1 = genotypesInRod.get(0); -// ArrayList snp3 = genotypesInRod.get(2); -// -// Assert.assertEquals("That there are three Genotypes in SNP 1",3,snp1.size()); -// Assert.assertEquals("That there are three samples in SNP 2", 3, sampleNamesInRod.get(1).size()); -// Assert.assertEquals("That there are three Genotypes in SNP 3",3,snp3.size()); -// -// -// List snp1_individual3_alleles = snp1.get(2).getAlleles(); -// List snp3_individual2_alleles = snp3.get(1).getAlleles(); -// -// String alleleStr1 = snp1_individual3_alleles.get(0).getBases()+snp1_individual3_alleles.get(1).getBases(); -// String alleleStr2 = snp3_individual2_alleles.get(0).getBases()+snp3_individual2_alleles.get(1).getBases(); -// -// Assert.assertEquals("That the third genotype of snp 1 is correctly no-call","00",alleleStr1); -// Assert.assertEquals("That the second genotype of snp 3 is correctly G G","GG",alleleStr2); -// -// boolean name2isSame = true; -// -// for ( ArrayList names : sampleNamesInRod ) { -// name2isSame = name2isSame && names.get(1).equals("NAMELY"); -// } -// -// Assert.assertTrue("That the second name of all the genotypes is the same and is correct",name2isSame); -// -// // test that the loci are correctly parsed and in order -// -// List expectedLoci = Arrays.asList("1:123456","2:13274","3:11111"); -// boolean lociCorrect = true; -// for ( int i = 0; i < 3; i ++ ) { -// lociCorrect = lociCorrect && lociInRod.get(i).toString().equals(expectedLoci.get(i)); -// } -// } -// -// @Test -// public void testStandardPedFileWithIndels() { -// PlinkRod rod = new PlinkRod("test"); -// try { -// rod.initialize(new File("/humgen/gsa-hpprojects/GATK/data/Validation_Data/test/plink_rod_test/standard_plink_with_indels.ped") ); -// } catch ( FileNotFoundException e) { -// throw new StingException("Test file for testStandardPedFileWithIndels() could not be found", e); -// } -// -// // Iterate through the rod -// -// List> genotypesInRod = new ArrayList>(); -// ArrayList> sampleNamesInRod = new ArrayList>(); -// ArrayList lociInRod = new ArrayList(); -// ArrayList snpSites = new ArrayList(); -// do { -// genotypesInRod.add(rod.getGenotypes()); -// sampleNamesInRod.add(rod.getVariantSampleNames()); -// lociInRod.add(rod.getLocation()); -// snpSites.add(rod.variantIsSNP()); -// } while ( rod.parseLine(null,null) ); -// -// boolean snpOrder = true; -// List expectedOrder = Arrays.asList(true,false,true,false); -// for ( int i = 0; i < 4; i ++ ) { -// snpOrder = snpOrder && ( expectedOrder.get(i) == snpSites.get(i) ); -// } -// -// Assert.assertTrue("That the variant type order is as expected", snpOrder); -// Assert.assertTrue("That the second genotype of second variant is not a point mutation", ! genotypesInRod.get(1).get(1).isPointGenotype() ); -// Assert.assertTrue("That the second genotype of fourth variant is not a point mutation", ! genotypesInRod.get(3).get(1).isPointGenotype() ); -// Assert.assertTrue("That the second genotype of fourth variant is homozygous", genotypesInRod.get(3).get(1).isHom()); -// Assert.assertTrue("That the fourth genotype of fourth variant is heterozygous",genotypesInRod.get(3).get(3).isHet()); -// Assert.assertEquals("That the reference deletion genotype has the correct string", "ATTTAT",genotypesInRod.get(3).get(2).getAlleles().get(0).getBases()); -// Assert.assertEquals("That the insertion bases are correct","CTC",genotypesInRod.get(1).get(2).getAlleles().get(0).getBases()); -// Assert.assertEquals("That the snp bases are correct","GC",genotypesInRod.get(2).get(2).getAlleles().get(0).getBases()+genotypesInRod.get(2).get(2).getAlleles().get(1).getBases()); -// } -// -// @Test -// public void testBinaryPedFileNoIndels() { -// PlinkRod rod = new PlinkRod("binaryTest1"); -// try { -// rod.initialize(new File("/humgen/gsa-hpprojects/GATK/data/Validation_Data/test/plink_rod_test/binary_noindel_test.bed")); -// } catch (FileNotFoundException e) { -// throw new StingException("Test file for testBinaryPedFileNoIndels() could not be found",e); -// } -// -// // iterate through the ROD and get stuff -// ArrayList lociInRod = new ArrayList(); -// ArrayList> genotypesInRod = new ArrayList>(); -// ArrayList> samplesInRod = new ArrayList>(); -// -// do { -// lociInRod.add(rod.getLocation()); -// genotypesInRod.add(rod.getGenotypes()); -// samplesInRod.add(rod.getVariantSampleNames()); -// } while ( rod.parseLine(null,null) ); -// -// List expecLoc = Arrays.asList("1:123456","1:14327877","2:22074511","3:134787","3:178678","4:829645","4:5234132","12:1268713"); -// -// for ( int i = 0; i < expecLoc.size(); i ++ ) { -// Assert.assertEquals("That locus "+(i+1)+" in the rod is correct", expecLoc.get(i), lociInRod.get(i).toString()); -// } -// -// List expecAlleles = Arrays.asList("AA","AA","AA","GG","GG","GG","AA","TA","TT","CC","CC","GC","TC","CC","TT", -// "GG","GG","AG","TT","CC","CT","TG","GG","GG"); -// List expecHet = Arrays.asList(false,false,false,false,false,false,false,true,false,false,false,true,true,false, -// false,false,false,true,false,false,true,true,false,false); -// List expecName = Arrays.asList("NA12878","NA12890","NA07000","NA12878","NA12890","NA07000","NA12878","NA12890","NA07000", -// "NA12878","NA12890","NA07000","NA12878","NA12890","NA07000","NA12878","NA12890","NA07000","NA12878","NA12890","NA07000", -// "NA12878","NA12890","NA07000"); -// int snpNo = 1; -// int indiv = 1; -// int alleleOffset = 0; -// for ( ArrayList snp : genotypesInRod ) { -// for ( Genotype gen : snp ) { -// String alStr = gen.getAlleles().get(0).getBases()+gen.getAlleles().get(1).getBases(); -// Assert.assertEquals("That the allele of person "+indiv+" for snp "+snpNo+" is correct "+ -// "(allele offset "+alleleOffset+")", expecAlleles.get(alleleOffset),alStr); -// Assert.assertEquals("That the genotype of person "+indiv+" for snp "+snpNo+" is properly set", expecHet.get(alleleOffset),gen.isHet()); -// Assert.assertEquals("That the name of person "+indiv+" for snp "+snpNo+" is correct", expecName.get(alleleOffset),samplesInRod.get(snpNo-1).get(indiv-1)); -// indiv++; -// alleleOffset++; -// } -// indiv = 1; -// snpNo++; -// } -// } -// -// @Test -// public void testIndelBinary() { -// PlinkRod rod = new PlinkRod("binaryTest2"); -// try { -// rod.initialize(new File("/humgen/gsa-hpprojects/GATK/data/Validation_Data/test/plink_rod_test/binary_indel_test.bed")); -// } catch (FileNotFoundException e) { -// throw new StingException("Test file for testBinaryPedFileNoIndels() could not be found",e); -// } -// -// ArrayList> genotypesInRod = new ArrayList>(); -// do { -// genotypesInRod.add(rod.getGenotypes()); -// } while ( rod.parseLine(null,null) ); -// -// List expecAlleles = Arrays.asList("ACCA","","ACCAACCA","GGGG","GG","","AA","TA","00","","CCTCCT","CCT", -// "TC","CC","TT","GG","GG","AG","","CTTGCTTG","CTTG","TG","GG","GG"); -// List expecDeletion = Arrays.asList(false,false,false,false,false,false,false,false,false,true,false,true, -// false,false,false,false,false,false,true,false,true,false,false,false); -// List expecInsertion = Arrays.asList(true,false,true,true,true,false,false,false,false,false,false,false, -// false,false,false,false,false,false,false,false,false,false,false,false); -// -// int al = 0; -// for ( ArrayList snp : genotypesInRod ) { -// for ( Genotype gen : snp ) { -// String alStr = gen.getAlleles().get(0).getBases()+gen.getAlleles().get(1).getBases(); -// Allele firstAl = gen.getAlleles().get(0); -// Allele secondAl = gen.getAlleles().get(1); -// boolean isInsertion = ( firstAl.getType() == Allele.AlleleType.INSERTION || secondAl.getType() == Allele.AlleleType.INSERTION ); -// boolean isDeletion = ( firstAl.getType() == Allele.AlleleType.DELETION || secondAl.getType() == Allele.AlleleType.DELETION ); -// Assert.assertEquals("That the indel file has the correct alleles for genotype "+al,expecAlleles.get(al), alStr); -// Assert.assertEquals("That the insertion property of genotype "+al+" is correct",expecInsertion.get(al),isInsertion); -// Assert.assertEquals("That the deletion property of genotype "+al+" is correct", expecDeletion.get(al), isDeletion); -// al++; -// } -// } -// } -//} +package org.broadinstitute.sting.gatk.refdata; +/* +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.oneoffprojects.variantcontext.Allele; +import org.broadinstitute.sting.oneoffprojects.variantcontext.Genotype; +import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile; +import org.broadinstitute.sting.utils.StingException; +import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.junit.BeforeClass; +import org.junit.Test; +import org.junit.Assert; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.BufferedReader; +import java.io.FileReader; +import java.util.*; + +/** + * Created by IntelliJ IDEA. + * User: chartl + * Date: Jan 22, 2010 + * Time: 11:27:33 PM + * To change this template use File | Settings | File Templates. + * +public class PlinkRodTest extends BaseTest { + // todo :: get the isIndel() isInsertion() and isDeletion() tests working again -- this may require new + // todo :: methods in the objects themselves + private static IndexedFastaSequenceFile seq; + + @BeforeClass + public static void beforeTests() { + try { + seq = new IndexedFastaSequenceFile(new File(oneKGLocation + "reference/human_b36_both.fasta")); + } catch (FileNotFoundException e) { + throw new StingException("unable to load the sequence dictionary"); + } + GenomeLocParser.setupRefContigOrdering(seq); + + } + + public BufferedReader openFile(String filename) { + try { + return new BufferedReader(new FileReader(filename)); + } catch (FileNotFoundException e) { + throw new StingException("Couldn't open file " + filename); + } + + } + + @Test + public void testStandardPedFile() { + PlinkRod rod = new PlinkRod("test"); + try { + rod.initialize( new File("/humgen/gsa-hpprojects/GATK/data/Validation_Data/test/plink_rod_test/standard_plink_test.ped") ); + } catch ( FileNotFoundException e ) { + throw new StingException("test file for testStandardPedFile() does not exist",e); + } + + // test that the sample names are correct + + List rodNames = rod.getVariantSampleNames(); + List expectedNames = Arrays.asList("NA12887","NAMELY","COWBA"); + + Assert.assertEquals("That there are as many samples in the rod as in the expected list",expectedNames.size(),rodNames.size()); + + boolean namesCorrect = true; + for ( int i = 0; i < expectedNames.size(); i++ ) { + namesCorrect = namesCorrect && ( rodNames.get(i).equals(expectedNames.get(i)) ); + } + + Assert.assertTrue("That the names are correct and in the proper order",namesCorrect); + + // test that rod can be iterated over + + ArrayList> genotypesInRod = new ArrayList>(); + ArrayList> sampleNamesInRod = new ArrayList>(); + ArrayList lociInRod = new ArrayList(); + do { + genotypesInRod.add(rod.getGenotypes()); + sampleNamesInRod.add(rod.getVariantSampleNames()); + lociInRod.add(rod.getLocation()); + } while ( rod.parseLine(null,null) ); + + Assert.assertEquals("That there are three SNPs in the ROD",3,genotypesInRod.size()); + + ArrayList snp1 = genotypesInRod.get(0); + ArrayList snp3 = genotypesInRod.get(2); + + Assert.assertEquals("That there are three Genotypes in SNP 1",3,snp1.size()); + Assert.assertEquals("That there are three samples in SNP 2", 3, sampleNamesInRod.get(1).size()); + Assert.assertEquals("That there are three Genotypes in SNP 3",3,snp3.size()); + + + List snp1_individual3_alleles = snp1.get(2).getAlleles(); + List snp3_individual2_alleles = snp3.get(1).getAlleles(); + + String alleleStr1 = new String(snp1_individual3_alleles.get(0).getBases())+new String(snp1_individual3_alleles.get(1).getBases()); + String alleleStr2 = new String(snp3_individual2_alleles.get(0).getBases())+new String(snp3_individual2_alleles.get(1).getBases()); + + Assert.assertEquals("That the third genotype of snp 1 is correctly no-call","00",alleleStr1); + Assert.assertEquals("That the second genotype of snp 3 is correctly G G","GG",alleleStr2); + + boolean name2isSame = true; + + for ( ArrayList names : sampleNamesInRod ) { + name2isSame = name2isSame && names.get(1).equals("NAMELY"); + } + + Assert.assertTrue("That the second name of all the genotypes is the same and is correct",name2isSame); + + // test that the loci are correctly parsed and in order + + List expectedLoci = Arrays.asList("1:123456","2:13274","3:11111"); + boolean lociCorrect = true; + for ( int i = 0; i < 3; i ++ ) { + lociCorrect = lociCorrect && lociInRod.get(i).toString().equals(expectedLoci.get(i)); + } + } + + @Test + public void testStandardPedFileWithIndels() { + PlinkRod rod = new PlinkRod("test"); + try { + rod.initialize(new File("/humgen/gsa-hpprojects/GATK/data/Validation_Data/test/plink_rod_test/standard_plink_with_indels.ped") ); + } catch ( FileNotFoundException e) { + throw new StingException("Test file for testStandardPedFileWithIndels() could not be found", e); + } + + // Iterate through the rod + + List> genotypesInRod = new ArrayList>(); + ArrayList> sampleNamesInRod = new ArrayList>(); + ArrayList lociInRod = new ArrayList(); + ArrayList snpSites = new ArrayList(); + do { + genotypesInRod.add(rod.getGenotypes()); + sampleNamesInRod.add(rod.getVariantSampleNames()); + lociInRod.add(rod.getLocation()); + snpSites.add(rod.variantIsSNP()); + } while ( rod.parseLine(null,null) ); + + boolean snpOrder = true; + List expectedOrder = Arrays.asList(true,false,true,false); + for ( int i = 0; i < 4; i ++ ) { + snpOrder = snpOrder && ( expectedOrder.get(i) == snpSites.get(i) ); + } + + Assert.assertTrue("That the variant type order is as expected", snpOrder); + // Assert.assertTrue("That the second genotype of second variant is not a point mutation", ! genotypesInRod.get(1).get(1). ); + // Assert.assertTrue("That the second genotype of fourth variant is not a point mutation", ! genotypesInRod.get(3).get(1).isPointGenotype() ); + Assert.assertTrue("That the second genotype of fourth variant is homozygous", genotypesInRod.get(3).get(1).isHom()); + Assert.assertTrue("That the fourth genotype of fourth variant is heterozygous",genotypesInRod.get(3).get(3).isHet()); + Assert.assertEquals("That the reference deletion genotype has the correct string", "ATTTAT",genotypesInRod.get(3).get(2).getAlleles().get(0).getBases()); + Assert.assertEquals("That the insertion bases are correct","CTC",genotypesInRod.get(1).get(2).getAlleles().get(0).getBases()); + Assert.assertEquals("That the snp bases are correct","GC",new String(genotypesInRod.get(2).get(2).getAlleles().get(0).getBases())+new String(genotypesInRod.get(2).get(2).getAlleles().get(1).getBases())); + } + + @Test + public void testBinaryPedFileNoIndels() { + PlinkRod rod = new PlinkRod("binaryTest1"); + try { + rod.initialize(new File("/humgen/gsa-hpprojects/GATK/data/Validation_Data/test/plink_rod_test/binary_noindel_test.bed")); + } catch (FileNotFoundException e) { + throw new StingException("Test file for testBinaryPedFileNoIndels() could not be found",e); + } + + // iterate through the ROD and get stuff + ArrayList lociInRod = new ArrayList(); + ArrayList> genotypesInRod = new ArrayList>(); + ArrayList> samplesInRod = new ArrayList>(); + + do { + lociInRod.add(rod.getLocation()); + genotypesInRod.add(rod.getGenotypes()); + samplesInRod.add(rod.getVariantSampleNames()); + } while ( rod.parseLine(null,null) ); + + List expecLoc = Arrays.asList("1:123456","1:14327877","2:22074511","3:134787","3:178678","4:829645","4:5234132","12:1268713"); + + for ( int i = 0; i < expecLoc.size(); i ++ ) { + Assert.assertEquals("That locus "+(i+1)+" in the rod is correct", expecLoc.get(i), lociInRod.get(i).toString()); + } + + List expecAlleles = Arrays.asList("AA","AA","AA","GG","GG","GG","AA","TA","TT","CC","CC","GC","TC","CC","TT", + "GG","GG","AG","TT","CC","CT","TG","GG","GG"); + List expecHet = Arrays.asList(false,false,false,false,false,false,false,true,false,false,false,true,true,false, + false,false,false,true,false,false,true,true,false,false); + List expecName = Arrays.asList("NA12878","NA12890","NA07000","NA12878","NA12890","NA07000","NA12878","NA12890","NA07000", + "NA12878","NA12890","NA07000","NA12878","NA12890","NA07000","NA12878","NA12890","NA07000","NA12878","NA12890","NA07000", + "NA12878","NA12890","NA07000"); + int snpNo = 1; + int indiv = 1; + int alleleOffset = 0; + for ( ArrayList snp : genotypesInRod ) { + for ( Genotype gen : snp ) { + String alStr = new String(gen.getAlleles().get(0).getBases())+new String(gen.getAlleles().get(1).getBases()); + Assert.assertEquals("That the allele of person "+indiv+" for snp "+snpNo+" is correct "+ + "(allele offset "+alleleOffset+")", expecAlleles.get(alleleOffset),alStr); + Assert.assertEquals("That the genotype of person "+indiv+" for snp "+snpNo+" is properly set", expecHet.get(alleleOffset),gen.isHet()); + Assert.assertEquals("That the name of person "+indiv+" for snp "+snpNo+" is correct", expecName.get(alleleOffset),samplesInRod.get(snpNo-1).get(indiv-1)); + indiv++; + alleleOffset++; + } + indiv = 1; + snpNo++; + } + } + + @Test + public void testIndelBinary() { + PlinkRod rod = new PlinkRod("binaryTest2"); + try { + rod.initialize(new File("/humgen/gsa-hpprojects/GATK/data/Validation_Data/test/plink_rod_test/binary_indel_test.bed")); + } catch (FileNotFoundException e) { + throw new StingException("Test file for testBinaryPedFileNoIndels() could not be found",e); + } + + ArrayList> genotypesInRod = new ArrayList>(); + do { + genotypesInRod.add(rod.getGenotypes()); + } while ( rod.parseLine(null,null) ); + + List expecAlleles = Arrays.asList("ACCA","","ACCAACCA","GGGG","GG","","AA","TA","00","","CCTCCT","CCT", + "TC","CC","TT","GG","GG","AG","","CTTGCTTG","CTTG","TG","GG","GG"); + List expecDeletion = Arrays.asList(false,false,false,false,false,false,false,false,false,true,false,true, + false,false,false,false,false,false,true,false,true,false,false,false); + List expecInsertion = Arrays.asList(true,false,true,true,true,false,false,false,false,false,false,false, + false,false,false,false,false,false,false,false,false,false,false,false); + + int al = 0; + for ( ArrayList snp : genotypesInRod ) { + for ( Genotype gen : snp ) { + String alStr = new String(gen.getAlleles().get(0).getBases())+new String(gen.getAlleles().get(1).getBases()); + Allele firstAl = gen.getAlleles().get(0); + Allele secondAl = gen.getAlleles().get(1); + // boolean isInsertion = ( firstAl.getType() == Allele.AlleleType.INSERTION || secondAl.getType() == Allele.AlleleType.INSERTION ); + // boolean isDeletion = ( firstAl.getType() == Allele.AlleleType.DELETION || secondAl.getType() == Allele.AlleleType.DELETION ); + Assert.assertEquals("That the indel file has the correct alleles for genotype "+al,expecAlleles.get(al), alStr); + // Assert.assertEquals("That the insertion property of genotype "+al+" is correct",expecInsertion.get(al),isInsertion); + // Assert.assertEquals("That the deletion property of genotype "+al+" is correct", expecDeletion.get(al), isDeletion); + al++; + } + } + } +}*/