diff --git a/java/src/org/broadinstitute/sting/gatk/contexts/VariantContext.java b/java/src/org/broadinstitute/sting/gatk/contexts/VariantContext.java
deleted file mode 100755
index 4f832b8ae..000000000
--- a/java/src/org/broadinstitute/sting/gatk/contexts/VariantContext.java
+++ /dev/null
@@ -1,315 +0,0 @@
-package org.broadinstitute.sting.gatk.contexts;
-
-import org.broadinstitute.sting.utils.GenomeLoc;
-import org.broadinstitute.sting.utils.StingException;
-import org.broadinstitute.sting.gatk.refdata.*;
-
-import java.util.*;
-import org.apache.commons.jexl.*;
-
-
-/**
- * @author ebanks
- *
- * Class VariantContext
- *
- * This class represents a context that unifies one or more variants
- */
-public class VariantContext {
-
- private static final String UNIQUIFIED_SUFFIX = ".unique";
-
- private Set alleles;
-
- private Set genotypes;
-
- private Allele reference;
-
- private GenomeLoc loc;
-
- private HashMap attributes;
-
-
- public VariantContext(VariationRod rod) {
-
- // TODO -- VariationRod should eventually require classes to implement toVariationContext()
- // TODO -- (instead of using a temporary adapter class)
-
- loc = rod.getLocation();
- reference = new Allele(Allele.AlleleType.REFERENCE, rod.getReference());
-
- // TODO -- populate the alleles and genotypes through an adapter
- alleles = new HashSet();
- genotypes = new HashSet();
-
- attributes = new HashMap();
- }
-
- protected VariantContext(GenomeLoc loc, Allele reference, Set alleles, Set genotypes, HashMap attributes) {
- this.loc = loc;
- this.reference = reference;
- this.alleles = new HashSet(alleles);
- this.genotypes = new HashSet(genotypes);
- this.attributes = new HashMap(attributes);
- }
-
- /**
- * @param other another variant context
- *
- * throws an exception if there is a collision such that the same sample exists in both contexts
- * @return a context representing the merge of this context and the other provided context
- */
- public VariantContext merge(VariantContext other) {
- return merge(other, false);
- }
-
- /**
- * @param other another variant context
- * @param uniquifySamples if true and there is a collision such that the same sample exists in both contexts,
- * the samples will be uniquified(based on their sources);
- * otherwise, an exception will be thrown
- *
- * @return a context representing the merge of this context and the other provided context
- */
- public VariantContext merge(VariantContext other, boolean uniquifySamples) {
- if ( !loc.equals(other.getLocation()) )
- throw new IllegalArgumentException("The locations must be identical for two contexts to be merged");
-
- Set samples = getSampleNames();
- Set Gs = new HashSet(genotypes);
-
- for ( Genotype g : other.getGenotypes() ) {
- if ( samples.contains(g.getSample()) ) {
- if ( uniquifySamples )
- g.setSample(g.getSample() + UNIQUIFIED_SUFFIX);
- else
- throw new IllegalStateException("The same sample name exists in both contexts when attempting to merge");
- }
- Gs.add(g);
- }
-
- HashMap attrs = new HashMap(attributes);
- attrs.putAll(other.getAttributes());
-
- return createNewContext(Gs, attrs);
- }
-
- /**
- * @return the location of this context
- */
- public GenomeLoc getLocation() { return loc; }
-
- /**
- * @return the reference allele for this context
- */
- public Allele getReference() { return reference; }
-
- /**
- * @return true if the context is variant (i.e. contains a non-reference allele)
- */
- public boolean isVariant() {
- for ( Allele allele : alleles ) {
- if ( allele.isVariant() )
- return true;
- }
- return false;
- }
-
- /**
- * @return true if the context is strictly bi-allelic
- */
- public boolean isBiallelic() {
- return getAlternateAlleles().size() == 1;
- }
-
- /**
- * @return true if the context represents point alleles only (i.e. no indels or structural variants)
- */
- public boolean isPointAllele() {
- for ( Allele allele : alleles ) {
- if ( allele.isVariant() && !allele.isSNP() )
- return false;
- }
- return true;
- }
-
- /**
- * @return the set of all sample names in this context
- */
- public Set getSampleNames() {
- Set samples = new TreeSet();
- for ( Genotype g : genotypes )
- samples.add(g.getSample());
- return samples;
- }
-
- /**
- * @return true if the context represents variants with associated genotypes
- */
- public boolean hasGenotypes() { return genotypes.size() > 0; }
-
- /**
- * @return set of all Genotypes associated with this context
- */
- public Set getGenotypes() { return genotypes; }
-
- /**
- * @param sample the sample name
- *
- * @return the Genotype associated with the given sample in this context or null if the sample is not in this context
- */
- public Genotype getGenotype(String sample) {
- for ( Genotype g : genotypes ) {
- if ( g.getSample().equals(sample) )
- return g;
- }
- return null;
- }
-
- /**
- * @return set of all subclasses within this context
- */
- public Set getSubclasses() {
- Set subclasses = new HashSet();
- for ( Genotype g : genotypes )
- subclasses.addAll(g.getAttributes().keySet());
- return subclasses;
- }
-
- /**
- * @param subclass the name of a subclass of variants to select
- *
- * @return a subset of this context which selects based on the given subclass
- */
- public VariantContext select(String subclass) {
- HashSet Gs = new HashSet();
- for ( Genotype g : genotypes ) {
- if ( g.getAttribute(subclass) != null )
- Gs.add(g);
- }
- return createNewContext(Gs, attributes);
- }
-
- /**
- * @param expr a jexl expression describing how to filter this context
- *
- * @return a subset of this context which is filtered based on the given expression
- */
- public VariantContext filter(String expr) {
- HashSet Gs = new HashSet();
- try {
- Expression filterExpression = ExpressionFactory.createExpression(expr);
-
- for ( Genotype g : genotypes ) {
- JexlContext jContext = JexlHelper.createContext();
- jContext.setVars(g.getAttributes());
- if ( (Boolean)filterExpression.evaluate(jContext) )
- Gs.add(g);
- }
-
- } catch (Exception e) {
- throw new StingException("JEXL error in VariantContext: " + e.getMessage());
- }
-
- return createNewContext(Gs, attributes);
- }
-
- /**
- * @return a set of new variant contexts, one for each sample from this context
- */
- public Set splitBySample() {
- Set contexts = new HashSet();
- for ( Genotype g : genotypes ) {
- HashSet gAsSet = new HashSet();
- gAsSet.add(g);
- contexts.add(createNewContext(gAsSet, attributes));
- }
- return contexts;
- }
-
- /**
- * @param Gs the set of genotypes from which to create a new context
- * @param attrs the attributes for the new context
- *
- * @return a new context based on the given genotypes
- */
- private VariantContext createNewContext(Set Gs, HashMap attrs) {
- HashSet As = new HashSet();
- for ( Genotype g : Gs )
- As.addAll(g.getAlleles());
- return new VariantContext(loc, reference, As, Gs, attrs);
- }
-
- /**
- * @param allele the allele to be queried
- *
- * @return the frequency of the given allele in this context
- */
- public double getAlleleFrequency(Allele allele) {
-
- int alleleCount = 0;
- int totalCount = 0;
-
- for ( Genotype g : genotypes ) {
- for ( Allele a : g.getAlleles() ) {
- totalCount++;
- if ( allele.equals(a) )
- alleleCount++;
- }
- }
-
- return totalCount == 0 ? 0.0 : (double)alleleCount / (double)totalCount;
- }
-
- /**
- * Gets the alleles. This method should return all of the alleles present at the location,
- * including the reference allele. There are no constraints imposed on the ordering of alleles
- * in the set. If the reference is not an allele in this context it will not be included.
- *
- * @return the set of alleles
- */
- public Set getAlleles() { return alleles; }
-
- /**
- * Gets the alternate alleles. This method should return all the alleles present at the location,
- * NOT including the reference allele. There are no constraints imposed on the ordering of alleles
- * in the set.
- *
- * @return the set of alternate alleles
- */
- public Set getAlternateAlleles() {
- HashSet altAlleles = new HashSet();
- for ( Allele allele : alleles ) {
- if ( !allele.equals(reference) )
- altAlleles.add(allele);
- }
- return altAlleles;
- }
-
- /**
- * Sets the given attribute
- *
- * @param key the attribute key
- * @param value the attribute value
- */
- public void setAttribute(Object key, Object value) {
- attributes.put(key, value);
- }
-
- /**
- * @param key the attribute key
- *
- * @return the attribute value for the given key (or null if not set)
- */
- public Object getAttribute(Object key) {
- return attributes.get(key);
- }
-
- /**
- * @return the attribute map
- */
- public Map getAttributes() {
- return attributes;
- }
-
-}
\ No newline at end of file
diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/Allele.java b/java/src/org/broadinstitute/sting/gatk/refdata/Allele.java
deleted file mode 100755
index ebdc4382d..000000000
--- a/java/src/org/broadinstitute/sting/gatk/refdata/Allele.java
+++ /dev/null
@@ -1,81 +0,0 @@
-package org.broadinstitute.sting.gatk.refdata;
-
-
-/**
- * @author ebanks
- *
- * Class Allele
- *
- * This class emcompasses all the basic information about an allele
- */
-public class Allele {
-
- private AlleleType type;
-
- private String bases;
-
-
- // the types of variants we currently allow
- public enum AlleleType {
- REFERENCE, SNP, INSERTION, DELETION, INVERSION, UNKNOWN_POINT_ALLELE, DELETION_REFERENCE
- }
-
- public Allele(AlleleType type, String bases) {
- this.type = type;
- if ( bases == null )
- throw new IllegalArgumentException("Constructor: the Allele base string cannot be null");
- if ( type == AlleleType.DELETION && bases.length() > 0 )
- throw new IllegalArgumentException("Constructor: deletions cannot have observed bases");
- if ( (type == AlleleType.REFERENCE || type == AlleleType.SNP || type == AlleleType.UNKNOWN_POINT_ALLELE) && bases.length() > 1 )
- throw new IllegalArgumentException("Constructor: point alleles cannot have more than one observed base");
- this.bases = bases.toUpperCase();
- }
-
- /**
- * convenience method for switching over the allele type
- *
- * @return the AlleleType of this allele
- **/
- public AlleleType getType() { return type; }
-
- /**
- * convenience method for SNPs
- *
- * @return true if this is a SNP, false otherwise
- */
- public boolean isSNP() { return type == AlleleType.SNP; }
-
- /**
- * convenience method for variants
- *
- * @return true if this is a variant allele, false if it's reference
- */
- public boolean isVariant() { return type != AlleleType.REFERENCE; }
-
-
- /**
- * convenience method for indels
- *
- * @return true if this is an indel, false otherwise
- */
- public boolean isIndel() { return type == AlleleType.INSERTION || type == AlleleType.DELETION; }
-
-
- /**
- * For deletions, this method returns an empty String.
- * For everything else, observed bases for the allele are returned.
- *
- * @return the bases, as a string
- */
- public String getBases() { return bases; }
-
- /**
- * @param other the other allele
- *
- * @return true if these alleles are equal
- */
- public boolean equals(Allele other) {
- return type == other.getType() && bases.equals(other.getBases());
- }
-
-}
\ No newline at end of file
diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/PlinkRod.java b/java/src/org/broadinstitute/sting/gatk/refdata/PlinkRod.java
index 589f02a1c..506087adf 100644
--- a/java/src/org/broadinstitute/sting/gatk/refdata/PlinkRod.java
+++ b/java/src/org/broadinstitute/sting/gatk/refdata/PlinkRod.java
@@ -1,671 +1,671 @@
-package org.broadinstitute.sting.gatk.refdata;
-
-import org.broadinstitute.sting.utils.BaseUtils;
-import org.broadinstitute.sting.utils.GenomeLoc;
-import org.broadinstitute.sting.utils.GenomeLocParser;
-import org.broadinstitute.sting.utils.StingException;
-import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
-import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
-import org.broadinstitute.sting.gatk.CommandLineGATK;
-
-import java.io.*;
-import java.util.*;
-
-import net.sf.samtools.SAMFileHeader;
-
-/**
- * Created by IntelliJ IDEA.
- * User: chartl
- * Date: Jan 19, 2010
- * Time: 10:24:18 AM
- * To change this template use File | Settings | File Templates.
- */
-public class PlinkRod extends BasicReferenceOrderedDatum implements ReferenceOrderedDatum {
- private final Set headerEntries = new HashSet(Arrays.asList("#Family ID","Individual ID","Sex",
- "Paternal ID","Maternal ID","Phenotype", "FID","IID","PAT","MAT","SEX","PHENOTYPE"));
- private final byte SNP_MAJOR_MODE = 1;
-
- private ArrayList variants;
- private PlinkVariantInfo currentVariant;
- private ListIterator variantIterator;
-
- private PlinkFileType plinkFileType;
-
- public enum PlinkFileType {
- STANDARD_PED,RAW_PED,BINARY_PED
- }
-
- // // // CONSTRUCTOR // // //
-
- public PlinkRod(String name) {
- super(name);
- }
-
- @Override
- public Object initialize(final File plinkFile) throws FileNotFoundException {
- if ( ! plinkFile.exists() ) {
- throw new FileNotFoundException("File "+plinkFile.getAbsolutePath()+" does not exist.");
- }
-
- variants = parsePlinkFile(plinkFile);
- if ( variants != null ) {
- variantIterator = variants.listIterator();
- currentVariant = variantIterator.next();
- }
-
- assertNotNull();
-
- return null;
- }
-
- private void assertNotNull() {
- if ( currentVariant == null ) {
- throw new UnsupportedOperationException ( "Current sequenom variant information was set to null" );
- }
- }
-
- @Override
- public boolean parseLine(Object obj, String[] args) {
- if ( variantIterator.hasNext() ) {
- currentVariant = variantIterator.next();
- return true;
- } else {
- return false;
- }
- }
-
- @Override
- public GenomeLoc getLocation() {
- return currentVariant.getLocation();
- }
-
- @Override
- public String toString() {
- return currentVariant == null ? "" : currentVariant.toString();
- }
-
- public String getVariantName() {
- return currentVariant.getName();
- }
-
- public ArrayList getVariantSampleNames() {
- return currentVariant.getSampleNames();
- }
-
- public ArrayList getGenotypes() {
- return currentVariant.getGenotypes();
- }
-
- public boolean variantIsSNP() {
- return currentVariant.isSNP();
- }
-
-
- // AM I PARSING A TEXT OR A BINARY FILE ??
-
- private ArrayList parsePlinkFile(File file) {
- String[] splitFileName = file.getName().split("\\.");
- String extension = splitFileName[splitFileName.length-1];
- if ( extension.equals("ped") || extension.equals("raw") ) {
- return parseTextFormattedPlinkFile(file);
- } else if ( extension.equals("bed") || extension.equals("bim") || extension.equals("fam") ) {
- plinkFileType = PlinkFileType.BINARY_PED;
- return parseBinaryFormattedPlinkFile(file);
- } else {
- System.out.println("Warning -- Plink file does not have a standard extension (ped/raw for text, bed/bim/fam for binary) -- assuming ped format");
- return parseTextFormattedPlinkFile(file);
- }
-
- }
-
- /* *** *** *** *** *** ** *** ** *** ** *** ** *** ** ***
- * * PARSING STANDARD TEXT PED FILES * **
- * *** *** *** *** *** ** *** ** *** ** *** ** *** ** ***/
-
- private ArrayList parseTextFormattedPlinkFile( File file ) {
- try {
- BufferedReader reader = new BufferedReader( new FileReader ( file ) );
- String header = reader.readLine();
- ArrayList seqVars = instantiateVariantListFromHeader(header);
- ArrayList snpOffsets = getSNPOffsetsFromHeader(header);
-
- String line = null;
- do {
- line = reader.readLine();
- incorporateInfo(seqVars,snpOffsets,line);
- } while ( line != null );
-
-
- java.util.Collections.sort(seqVars); // because the comparable uses the GenomeLoc comparable; these
- // are sorted in standard reference order
-
- return seqVars;
-
- } catch ( FileNotFoundException e ) {
- throw new StingException("File "+file.getAbsolutePath()+" could not be found. This was checked earlier. Should never happen.",e);
- } catch ( IOException e ) {
- throw new StingException("Error reading file "+file.getAbsolutePath()+".",e);
- }
- }
-
- private void incorporateInfo(List vars, List offsets, String plinkLine) {
- if ( plinkLine == null ) {
- return;
- }
-
- String[] plinkInfo;
- if ( plinkFileType == PlinkFileType.STANDARD_PED) {
- plinkInfo = plinkLine.split("\t");
- } else {
- throw new StingException("Plink file is likely of .raw or recoded format. Please use an uncoded .ped file.");
- }
-
- String individualName = plinkInfo[1];
-
- int snpNumber = 0;
-
- if ( plinkFileType == PlinkFileType.STANDARD_PED ) {
- for ( int i : offsets ) {
- vars.get(snpNumber).addGenotypeEntry(plinkInfo[i], individualName);
- snpNumber++;
- }
- }
- }
-
- private ArrayList instantiateVariantListFromHeader(String header) {
- if ( header.startsWith("#") ) {// first line is a comment; what we're used to seeing
- plinkFileType = PlinkFileType.STANDARD_PED;
- } else {// first line is the raw header; comes from de-binary-ing a .bed file
- plinkFileType = PlinkFileType.RAW_PED;
- throw new StingException("Plink file is likely of .raw or recoded format. Please use an uncoded .ped file.");
- }
-
- ArrayList seqVars = new ArrayList();
- String[] headerFields;
-
- if ( plinkFileType == PlinkFileType.STANDARD_PED ) {
- headerFields = header.split("\t");
- } else {
- throw new StingException("Plink file is likely of .raw or recoded format. Please use an uncoded .ped file.");
- }
-
- for ( String field : headerFields ) {
- if ( ! headerEntries.contains(field) ) {
- // not a standard header, so a variant
- seqVars.add(new PlinkVariantInfo(field));
- }
- }
-
- return seqVars;
- }
-
- private ArrayList getSNPOffsetsFromHeader(String header) {
- ArrayList offsets = new ArrayList();
- String[] headerFields;
- if ( plinkFileType == PlinkFileType.STANDARD_PED ) {
- headerFields = header.split("\t+");
- } else {
- headerFields = header.split("\\s+");
- }
-
- int offset = 0;
-
- for ( String field : headerFields ) {
- if ( ! headerEntries.contains(field) ) {
- offsets.add(offset);
- }
- offset++;
- }
-
- return offsets;
- }
-
- /* *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
- * * PARSING BINARY PLINK BED/BIM/FAM FILES * *
- * *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***/
-
- private ArrayList parseBinaryFormattedPlinkFile(File file) {
- PlinkBinaryTrifecta binaryFiles = getPlinkBinaryTriplet(file);
- ArrayList parsedVariants = instantiateVariantsFromBimFile(binaryFiles.bimFile);
- ArrayList sampleNames = getSampleNameOrderingFromFamFile(binaryFiles.famFile);
- ArrayList updatedVariants = getGenotypesFromBedFile(parsedVariants,sampleNames,binaryFiles.bedFile);
-
- java.util.Collections.sort(updatedVariants);
-
- return updatedVariants;
- }
-
- private PlinkBinaryTrifecta getPlinkBinaryTriplet(File file) {
- // just gonna parse the name
- PlinkBinaryTrifecta trifecta = new PlinkBinaryTrifecta();
- String absolute_path = file.getAbsolutePath();
- String[] directory_tree = absolute_path.split("/");
- String file_name = directory_tree[directory_tree.length-1].split("\\.")[0];
- StringBuilder pathBuilder = new StringBuilder();
- for ( int i = 0; i < directory_tree.length - 1; i ++ ) {
- pathBuilder.append(String.format("%s/",directory_tree[i]));
- }
- String path = pathBuilder.toString();
- trifecta.bedFile = new File(path+file_name+".bed");
- trifecta.bimFile = new File(path+file_name+".bim");
- trifecta.famFile = new File(path+file_name+".fam");
-
- return trifecta;
-
- }
-
- private ArrayList instantiateVariantsFromBimFile(File bimFile) {
- BufferedReader reader;
- try {
- reader = new BufferedReader( new FileReader( bimFile ));
- } catch ( FileNotFoundException e) {
- throw new StingException("The SNP information file accompanying the binary ped file was not found (the .bim file). "+
- "Please ensure that it is in the same directory as the .bed and .fam files. The file we "+
- "Were looking for was "+bimFile.getAbsolutePath(),e);
- }
-
- ArrayList variants = new ArrayList();
-
- try {
- String line = null;
- do {
- line = reader.readLine();
- if ( line != null ) {
- String[] snpInfo = line.split("\\s+");
- PlinkVariantInfo variant = new PlinkVariantInfo(snpInfo[1],true);
- variant.setGenomeLoc(GenomeLocParser.parseGenomeLoc(snpInfo[0],Long.valueOf(snpInfo[3]), Long.valueOf(snpInfo[3])));
- variant.setAlleles(snpInfo[4],snpInfo[5]);
- variants.add(variant);
- }
- } while ( line != null );
- } catch ( IOException e ) {
- throw new StingException("There was an error reading the .bim file "+bimFile.getAbsolutePath(), e);
- }
-
- return variants;
- }
-
- private ArrayList getSampleNameOrderingFromFamFile(File famFile) {
- BufferedReader reader;
- try {
- reader = new BufferedReader( new FileReader( famFile ));
- } catch ( FileNotFoundException e) {
- throw new StingException("The Family information file accompanying the binary ped file was not found (the .fam file). "+
- "Please ensure that it is in the same directory as the .bed and .bim files. The file we "+
- "Were looking for was "+famFile.getAbsolutePath(),e);
- }
-
- ArrayList sampleNames = new ArrayList();
-
- try {
- String line = null;
- do {
- line = reader.readLine();
- if ( line != null ) {
- sampleNames.add(line.split("\\s+")[1]);
- }
- } while ( line != null );
- } catch (IOException e) {
- throw new StingException("There was an error reading the .fam file "+famFile.getAbsolutePath(),e);
- }
-
- return sampleNames;
- }
-
- private ArrayList getGenotypesFromBedFile(ArrayList variants, ArrayList samples, File bedFile) {
- FileInputStream inStream;
- try {
- inStream = new FileInputStream(bedFile);
- } catch (FileNotFoundException e) {
- throw new StingException("The Binary pedigree file file accompanying the family file was not found (the .bed file). "+
- "Please ensure that it is in the same directory as the .bim and .fam files. The file we "+
- "Were looking for was "+bedFile.getAbsolutePath(),e);
- }
-
- try {
- byte genotype = -1;
- long bytesRead = 0;
- int snpOffset = 0;
- int sampleOffset = 0;
- boolean snpMajorMode = true;
- do {
- genotype = (byte) inStream.read();
- bytesRead++;
- if ( genotype != -1 ) {
- if ( bytesRead > 3 ) {
- addGenotypeByte(genotype,variants,samples,snpOffset,sampleOffset, snpMajorMode);
-
- if ( snpMajorMode ) {
- sampleOffset = sampleOffset + 4;
- if ( sampleOffset > samples.size() -1 ) {
- snpOffset ++;
- sampleOffset = 0;
- }
- } else {
- snpOffset = snpOffset + 4;
- if ( snpOffset > variants.size() -1 ) {
- sampleOffset ++;
- snpOffset = 0;
- }
- }
-
- } else {
- if ( bytesRead == 3) {
- snpMajorMode = genotype == SNP_MAJOR_MODE;
- }
- }
- }
- } while ( genotype != -1 );
- } catch ( IOException e) {
- throw new StingException("Error reading binary ped file "+bedFile.getAbsolutePath(), e);
- }
-
- return variants;
- }
-
- private void addGenotypeByte(byte genotype, ArrayList variants, ArrayList sampleNames, int snpOffset, int sampleOffset, boolean major) {
- // four genotypes encoded in this byte
- int[] genotypes = parseGenotypes(genotype);
- for ( int g : genotypes ) {
- variants.get(snpOffset).addBinaryGenotypeEntry(g,sampleNames.get(sampleOffset));
-
- if ( major ) {
- sampleOffset++;
- if ( sampleOffset > sampleNames.size()-1 ) { //using offsets for comparison; size 5 == offset 4
- return;
- }
- } else {
- snpOffset++;
- if( snpOffset > variants.size()-1 ) {
- return;
- }
- }
- }
- }
-
- private int[] parseGenotypes(byte genotype) {
- int[] genotypes = new int[4];
- genotypes[0] = ( genotype & 3 );
- genotypes[1] = ( ( genotype & 12 ) >>> 2 );
- genotypes[2] = ( ( genotype & 48 ) >>> 4 );
- genotypes[3] = ( ( genotype & 192 ) >>> 6 );
- return genotypes;
- }
-}
-
-class PlinkVariantInfo implements Comparable {
- private String variantName;
- private GenomeLoc loc;
- private ArrayList genotypes = new ArrayList();
- private ArrayList sampleNames = new ArrayList();
-
- private ArrayList indelHolder;
- private ArrayList sampleHolder;
- private int siteIndelLength;
- private Allele.AlleleType indelType;
-
- // for binary parsing
-
- private String locAllele1;
- private String locAllele2;
-
- public GenomeLoc getLocation() {
- return loc;
- }
-
- public String getName() {
- return variantName;
- }
-
- public ArrayList getSampleNames() {
- return sampleNames;
- }
-
- public ArrayList getGenotypes() {
- return genotypes;
- }
-
- public boolean isSNP() {
- return this.indelType == null;
- }
-
- public void setGenomeLoc(GenomeLoc loc) {
- this.loc = loc;
- }
-
- public void setAlleles(String al1, String al2) {
- if ( al1.equals("0") ) {
- // encoding for a site at which no variants were detected
- locAllele1 = al2;
- } else {
- locAllele1 = al1;
- }
- locAllele2 = al2;
- if ( ! isSNP() ) {
- siteIndelLength = Math.max(locAllele1.length(),locAllele2.length());
- }
-
- }
-
- // CONSTRUCTOR
-
- public PlinkVariantInfo(String variantName) {
- this.variantName = variantName;
- parseName();
- }
-
- public PlinkVariantInfo(String variantName, boolean onlyLookForIndelInfo ) {
- this.variantName = variantName;
- if ( onlyLookForIndelInfo ) {
- parseNameForIndels();
- } else {
- parseName();
- }
- }
-
- private void parseName() {
- String chrom = this.variantName.split("\\|c")[1].split("_")[0];
- String pos = this.variantName.split("_p")[1].split("_")[0];
- this.loc = GenomeLocParser.parseGenomeLoc(chrom+":"+pos);
- this.parseNameForIndels();
- }
-
- private void parseNameForIndels() {
- if ( this.variantName.contains("_gI") || this.variantName.contains("_gD") ) {
- this.instantiateIndel();
- }
- }
-
- private void instantiateIndel() {
- this.indelHolder = new ArrayList();
- this.sampleHolder = new ArrayList();
- this.siteIndelLength = -1;
- this.indelType = this.variantName.contains("_gI") ? Allele.AlleleType.INSERTION : Allele.AlleleType.DELETION;
- }
-
- public void addGenotypeEntry(String genotypeString, String sampleName) {
- // identify if we're dealing with a deletion
- if ( this.isSNP() ) {
- this.addSNP(genotypeString.split("\\s+"),sampleName);
- } else {
- this.addIndel(genotypeString.split("\\s+"),sampleName);
- }
- }
-
- public void addBinaryGenotypeEntry( int genoTYPE, String sampleName ) {
- String[] alleleStr = new String[2];
- if ( genoTYPE == 0 ) {
- alleleStr[0] = locAllele1;
- alleleStr[1] = locAllele1;
- } else if (genoTYPE == 2) {
- alleleStr[0] = locAllele1;
- alleleStr[1] = locAllele2;
- } else if (genoTYPE == 3 ) {
- alleleStr[0] = locAllele2;
- alleleStr[1] = locAllele2;
- } else {
- alleleStr[0] = "0";
- alleleStr[1] = "0";
- }
-
- if ( this.isSNP() ) {
- this.addSNP(alleleStr,sampleName);
- } else {
- this.addIndel(alleleStr,sampleName);
- }
- }
-
- private void addSNP(String[] alleleStrings, String sampleName) {
- ArrayList alleles = new ArrayList(2);
-
- for ( String alStr : alleleStrings ) {
- alleles.add(new Allele(Allele.AlleleType.UNKNOWN_POINT_ALLELE,alStr));
- }
-
- genotypes.add(new Genotype(alleles,sampleName,20.0) );
- sampleNames.add(sampleName);
- }
-
- private void addIndel(String[] alleleStrings, String sampleName) {
- String alleleStr1 = alleleStrings[0];
- String alleleStr2 = alleleStrings[1];
- if ( alleleStr1.contains("-") ^ alleleStr2.contains("-") ) {
- // heterozygous indel
- if ( alleleStr1.contains("-") ) {
- this.addHetIndel(alleleStr2,sampleName) ;
- } else {
- this.addHetIndel(alleleStr1,sampleName);
- }
- } else {
- this.addHomIndel(alleleStr1, alleleStr2, sampleName);
- }
- }
-
- private void addHetIndel(String baseStr, String sampleName) {
- Allele ref;
- Allele alt;
-
- if ( indelType == Allele.AlleleType.INSERTION ) {
- ref = new Allele(Allele.AlleleType.REFERENCE,"");
- alt = new Allele(indelType,baseStr);
- } else {
- alt = new Allele(indelType,"");
- ref = new Allele(Allele.AlleleType.DELETION_REFERENCE,baseStr);
- }
-
- this.setIndelLength(alt,baseStr.length());
-
- if ( ! indelHolder.isEmpty() ) {
- siteIndelLength = baseStr.length();
- this.addHeldIndels();
- }
-
- Genotype indel = new Genotype(Arrays.asList(ref,alt), sampleName, 20.0);
- this.setIndelGenotypeLength(indel,siteIndelLength);
- this.genotypes.add(indel);
- this.sampleNames.add(sampleName);
- }
-
- private void addHomIndel(String strand1, String strand2, String sampleName) {
- Allele allele1;
- Allele allele2;
- boolean reference;
- if ( indelType == Allele.AlleleType.DELETION ) {
- if ( strand1.contains("-") ) {
- // homozygous deletion
- allele1 = new Allele(indelType,"");
- allele2 = new Allele(indelType,"");
- reference = false;
- } else {
- allele1 = new Allele(Allele.AlleleType.DELETION_REFERENCE,strand1);
- allele2 = new Allele(Allele.AlleleType.DELETION_REFERENCE,strand2);
- reference = true;
- }
- } else {
- if ( strand1.contains("-") ) {
- // homozygous reference
- allele1 = new Allele(Allele.AlleleType.REFERENCE,"");
- allele2 = new Allele(Allele.AlleleType.REFERENCE,"");
- reference = true;
- } else {
- allele1 = new Allele(indelType,strand1);
- allele2 = new Allele(indelType,strand2);
- reference = false;
- }
- }
-
- if ( reference ) {
- if ( ! indelHolder.isEmpty() ) {
- siteIndelLength = strand1.length();
- this.addHeldIndels();
- }
- }
-
- if ( reference || siteIndelLength != -1 ) { // if we're ref or know the insertion/deletion length of the site
- Genotype gen = new Genotype(Arrays.asList(allele1,allele2), sampleName, 20.0);
- setIndelGenotypeLength(gen,siteIndelLength);
- this.genotypes.add(gen);
- this.sampleNames.add(sampleName);
- } else { // hold on the variants until we *do* know the in/del length at this site
- this.indelHolder.add(allele1);
- this.indelHolder.add(allele2);
- this.sampleHolder.add(sampleName);
- }
-
- }
-
- private void setIndelGenotypeLength(Genotype g, int length) {
- g.setAttribute(Genotype.StandardAttributes.DELETION_LENGTH,length);
- }
-
- private void addHeldIndels() {
- Allele del1;
- Allele del2;
- int startingSize = indelHolder.size();
- for ( int i = 0; i < startingSize ; i+=2 ) {
- del1 = indelHolder.get(i);
- del2 = indelHolder.get(i+1);
- this.addIndelFromCache(del1,del2,sampleHolder.get(i/2));
- if ( indelHolder.size() != startingSize ) {
- throw new StingException("Halting algorithm -- possible infinite loop");
- }
- }
- indelHolder.clear();
- sampleHolder.clear();
- }
-
- private void addIndelFromCache ( Allele indel1, Allele indel2, String sampleName ) {
- this.setIndelLength(indel1,siteIndelLength);
- this.setIndelLength(indel2,siteIndelLength);
- Genotype indel = new Genotype(Arrays.asList(indel1,indel2),sampleName, 20.0);
- this.setIndelGenotypeLength(indel,siteIndelLength);
- this.genotypes.add(indel);
- this.sampleNames.add(sampleName);
- }
-
- public int compareTo(Object obj) {
- if ( ! ( obj instanceof PlinkVariantInfo) ) {
- return 1;
- }
-
- return loc.compareTo(((PlinkVariantInfo) obj).getLocation());
- }
-
- private void setIndelLength(Allele al, int length) {
- // Todo -- once alleles support deletion lengths add that information
- // Todo -- into the object; for now this can just return
- return;
- }
-}
-
-class PlinkBinaryTrifecta {
-
- public PlinkBinaryTrifecta() {
-
- }
-
- public File bedFile;
- public File bimFile;
- public File famFile;
-
-}
\ No newline at end of file
+//package org.broadinstitute.sting.gatk.refdata;
+//
+//import org.broadinstitute.sting.utils.BaseUtils;
+//import org.broadinstitute.sting.utils.GenomeLoc;
+//import org.broadinstitute.sting.utils.GenomeLocParser;
+//import org.broadinstitute.sting.utils.StingException;
+//import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
+//import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
+//import org.broadinstitute.sting.gatk.CommandLineGATK;
+//
+//import java.io.*;
+//import java.util.*;
+//
+//import net.sf.samtools.SAMFileHeader;
+//
+///**
+// * Created by IntelliJ IDEA.
+// * User: chartl
+// * Date: Jan 19, 2010
+// * Time: 10:24:18 AM
+// * To change this template use File | Settings | File Templates.
+// */
+//public class PlinkRod extends BasicReferenceOrderedDatum implements ReferenceOrderedDatum {
+// private final Set headerEntries = new HashSet(Arrays.asList("#Family ID","Individual ID","Sex",
+// "Paternal ID","Maternal ID","Phenotype", "FID","IID","PAT","MAT","SEX","PHENOTYPE"));
+// private final byte SNP_MAJOR_MODE = 1;
+//
+// private ArrayList variants;
+// private PlinkVariantInfo currentVariant;
+// private ListIterator variantIterator;
+//
+// private PlinkFileType plinkFileType;
+//
+// public enum PlinkFileType {
+// STANDARD_PED,RAW_PED,BINARY_PED
+// }
+//
+// // // // CONSTRUCTOR // // //
+//
+// public PlinkRod(String name) {
+// super(name);
+// }
+//
+// @Override
+// public Object initialize(final File plinkFile) throws FileNotFoundException {
+// if ( ! plinkFile.exists() ) {
+// throw new FileNotFoundException("File "+plinkFile.getAbsolutePath()+" does not exist.");
+// }
+//
+// variants = parsePlinkFile(plinkFile);
+// if ( variants != null ) {
+// variantIterator = variants.listIterator();
+// currentVariant = variantIterator.next();
+// }
+//
+// assertNotNull();
+//
+// return null;
+// }
+//
+// private void assertNotNull() {
+// if ( currentVariant == null ) {
+// throw new UnsupportedOperationException ( "Current sequenom variant information was set to null" );
+// }
+// }
+//
+// @Override
+// public boolean parseLine(Object obj, String[] args) {
+// if ( variantIterator.hasNext() ) {
+// currentVariant = variantIterator.next();
+// return true;
+// } else {
+// return false;
+// }
+// }
+//
+// @Override
+// public GenomeLoc getLocation() {
+// return currentVariant.getLocation();
+// }
+//
+// @Override
+// public String toString() {
+// return currentVariant == null ? "" : currentVariant.toString();
+// }
+//
+// public String getVariantName() {
+// return currentVariant.getName();
+// }
+//
+// public ArrayList getVariantSampleNames() {
+// return currentVariant.getSampleNames();
+// }
+//
+// public ArrayList getGenotypes() {
+// return currentVariant.getGenotypes();
+// }
+//
+// public boolean variantIsSNP() {
+// return currentVariant.isSNP();
+// }
+//
+//
+// // AM I PARSING A TEXT OR A BINARY FILE ??
+//
+// private ArrayList parsePlinkFile(File file) {
+// String[] splitFileName = file.getName().split("\\.");
+// String extension = splitFileName[splitFileName.length-1];
+// if ( extension.equals("ped") || extension.equals("raw") ) {
+// return parseTextFormattedPlinkFile(file);
+// } else if ( extension.equals("bed") || extension.equals("bim") || extension.equals("fam") ) {
+// plinkFileType = PlinkFileType.BINARY_PED;
+// return parseBinaryFormattedPlinkFile(file);
+// } else {
+// System.out.println("Warning -- Plink file does not have a standard extension (ped/raw for text, bed/bim/fam for binary) -- assuming ped format");
+// return parseTextFormattedPlinkFile(file);
+// }
+//
+// }
+//
+// /* *** *** *** *** *** ** *** ** *** ** *** ** *** ** ***
+// * * PARSING STANDARD TEXT PED FILES * **
+// * *** *** *** *** *** ** *** ** *** ** *** ** *** ** ***/
+//
+// private ArrayList parseTextFormattedPlinkFile( File file ) {
+// try {
+// BufferedReader reader = new BufferedReader( new FileReader ( file ) );
+// String header = reader.readLine();
+// ArrayList seqVars = instantiateVariantListFromHeader(header);
+// ArrayList snpOffsets = getSNPOffsetsFromHeader(header);
+//
+// String line = null;
+// do {
+// line = reader.readLine();
+// incorporateInfo(seqVars,snpOffsets,line);
+// } while ( line != null );
+//
+//
+// java.util.Collections.sort(seqVars); // because the comparable uses the GenomeLoc comparable; these
+// // are sorted in standard reference order
+//
+// return seqVars;
+//
+// } catch ( FileNotFoundException e ) {
+// throw new StingException("File "+file.getAbsolutePath()+" could not be found. This was checked earlier. Should never happen.",e);
+// } catch ( IOException e ) {
+// throw new StingException("Error reading file "+file.getAbsolutePath()+".",e);
+// }
+// }
+//
+// private void incorporateInfo(List vars, List offsets, String plinkLine) {
+// if ( plinkLine == null ) {
+// return;
+// }
+//
+// String[] plinkInfo;
+// if ( plinkFileType == PlinkFileType.STANDARD_PED) {
+// plinkInfo = plinkLine.split("\t");
+// } else {
+// throw new StingException("Plink file is likely of .raw or recoded format. Please use an uncoded .ped file.");
+// }
+//
+// String individualName = plinkInfo[1];
+//
+// int snpNumber = 0;
+//
+// if ( plinkFileType == PlinkFileType.STANDARD_PED ) {
+// for ( int i : offsets ) {
+// vars.get(snpNumber).addGenotypeEntry(plinkInfo[i], individualName);
+// snpNumber++;
+// }
+// }
+// }
+//
+// private ArrayList instantiateVariantListFromHeader(String header) {
+// if ( header.startsWith("#") ) {// first line is a comment; what we're used to seeing
+// plinkFileType = PlinkFileType.STANDARD_PED;
+// } else {// first line is the raw header; comes from de-binary-ing a .bed file
+// plinkFileType = PlinkFileType.RAW_PED;
+// throw new StingException("Plink file is likely of .raw or recoded format. Please use an uncoded .ped file.");
+// }
+//
+// ArrayList seqVars = new ArrayList();
+// String[] headerFields;
+//
+// if ( plinkFileType == PlinkFileType.STANDARD_PED ) {
+// headerFields = header.split("\t");
+// } else {
+// throw new StingException("Plink file is likely of .raw or recoded format. Please use an uncoded .ped file.");
+// }
+//
+// for ( String field : headerFields ) {
+// if ( ! headerEntries.contains(field) ) {
+// // not a standard header, so a variant
+// seqVars.add(new PlinkVariantInfo(field));
+// }
+// }
+//
+// return seqVars;
+// }
+//
+// private ArrayList getSNPOffsetsFromHeader(String header) {
+// ArrayList offsets = new ArrayList();
+// String[] headerFields;
+// if ( plinkFileType == PlinkFileType.STANDARD_PED ) {
+// headerFields = header.split("\t+");
+// } else {
+// headerFields = header.split("\\s+");
+// }
+//
+// int offset = 0;
+//
+// for ( String field : headerFields ) {
+// if ( ! headerEntries.contains(field) ) {
+// offsets.add(offset);
+// }
+// offset++;
+// }
+//
+// return offsets;
+// }
+//
+// /* *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
+// * * PARSING BINARY PLINK BED/BIM/FAM FILES * *
+// * *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***/
+//
+// private ArrayList parseBinaryFormattedPlinkFile(File file) {
+// PlinkBinaryTrifecta binaryFiles = getPlinkBinaryTriplet(file);
+// ArrayList parsedVariants = instantiateVariantsFromBimFile(binaryFiles.bimFile);
+// ArrayList sampleNames = getSampleNameOrderingFromFamFile(binaryFiles.famFile);
+// ArrayList updatedVariants = getGenotypesFromBedFile(parsedVariants,sampleNames,binaryFiles.bedFile);
+//
+// java.util.Collections.sort(updatedVariants);
+//
+// return updatedVariants;
+// }
+//
+// private PlinkBinaryTrifecta getPlinkBinaryTriplet(File file) {
+// // just gonna parse the name
+// PlinkBinaryTrifecta trifecta = new PlinkBinaryTrifecta();
+// String absolute_path = file.getAbsolutePath();
+// String[] directory_tree = absolute_path.split("/");
+// String file_name = directory_tree[directory_tree.length-1].split("\\.")[0];
+// StringBuilder pathBuilder = new StringBuilder();
+// for ( int i = 0; i < directory_tree.length - 1; i ++ ) {
+// pathBuilder.append(String.format("%s/",directory_tree[i]));
+// }
+// String path = pathBuilder.toString();
+// trifecta.bedFile = new File(path+file_name+".bed");
+// trifecta.bimFile = new File(path+file_name+".bim");
+// trifecta.famFile = new File(path+file_name+".fam");
+//
+// return trifecta;
+//
+// }
+//
+// private ArrayList instantiateVariantsFromBimFile(File bimFile) {
+// BufferedReader reader;
+// try {
+// reader = new BufferedReader( new FileReader( bimFile ));
+// } catch ( FileNotFoundException e) {
+// throw new StingException("The SNP information file accompanying the binary ped file was not found (the .bim file). "+
+// "Please ensure that it is in the same directory as the .bed and .fam files. The file we "+
+// "Were looking for was "+bimFile.getAbsolutePath(),e);
+// }
+//
+// ArrayList variants = new ArrayList();
+//
+// try {
+// String line = null;
+// do {
+// line = reader.readLine();
+// if ( line != null ) {
+// String[] snpInfo = line.split("\\s+");
+// PlinkVariantInfo variant = new PlinkVariantInfo(snpInfo[1],true);
+// variant.setGenomeLoc(GenomeLocParser.parseGenomeLoc(snpInfo[0],Long.valueOf(snpInfo[3]), Long.valueOf(snpInfo[3])));
+// variant.setAlleles(snpInfo[4],snpInfo[5]);
+// variants.add(variant);
+// }
+// } while ( line != null );
+// } catch ( IOException e ) {
+// throw new StingException("There was an error reading the .bim file "+bimFile.getAbsolutePath(), e);
+// }
+//
+// return variants;
+// }
+//
+// private ArrayList getSampleNameOrderingFromFamFile(File famFile) {
+// BufferedReader reader;
+// try {
+// reader = new BufferedReader( new FileReader( famFile ));
+// } catch ( FileNotFoundException e) {
+// throw new StingException("The Family information file accompanying the binary ped file was not found (the .fam file). "+
+// "Please ensure that it is in the same directory as the .bed and .bim files. The file we "+
+// "Were looking for was "+famFile.getAbsolutePath(),e);
+// }
+//
+// ArrayList sampleNames = new ArrayList();
+//
+// try {
+// String line = null;
+// do {
+// line = reader.readLine();
+// if ( line != null ) {
+// sampleNames.add(line.split("\\s+")[1]);
+// }
+// } while ( line != null );
+// } catch (IOException e) {
+// throw new StingException("There was an error reading the .fam file "+famFile.getAbsolutePath(),e);
+// }
+//
+// return sampleNames;
+// }
+//
+// private ArrayList getGenotypesFromBedFile(ArrayList variants, ArrayList samples, File bedFile) {
+// FileInputStream inStream;
+// try {
+// inStream = new FileInputStream(bedFile);
+// } catch (FileNotFoundException e) {
+// throw new StingException("The Binary pedigree file file accompanying the family file was not found (the .bed file). "+
+// "Please ensure that it is in the same directory as the .bim and .fam files. The file we "+
+// "Were looking for was "+bedFile.getAbsolutePath(),e);
+// }
+//
+// try {
+// byte genotype = -1;
+// long bytesRead = 0;
+// int snpOffset = 0;
+// int sampleOffset = 0;
+// boolean snpMajorMode = true;
+// do {
+// genotype = (byte) inStream.read();
+// bytesRead++;
+// if ( genotype != -1 ) {
+// if ( bytesRead > 3 ) {
+// addGenotypeByte(genotype,variants,samples,snpOffset,sampleOffset, snpMajorMode);
+//
+// if ( snpMajorMode ) {
+// sampleOffset = sampleOffset + 4;
+// if ( sampleOffset > samples.size() -1 ) {
+// snpOffset ++;
+// sampleOffset = 0;
+// }
+// } else {
+// snpOffset = snpOffset + 4;
+// if ( snpOffset > variants.size() -1 ) {
+// sampleOffset ++;
+// snpOffset = 0;
+// }
+// }
+//
+// } else {
+// if ( bytesRead == 3) {
+// snpMajorMode = genotype == SNP_MAJOR_MODE;
+// }
+// }
+// }
+// } while ( genotype != -1 );
+// } catch ( IOException e) {
+// throw new StingException("Error reading binary ped file "+bedFile.getAbsolutePath(), e);
+// }
+//
+// return variants;
+// }
+//
+// private void addGenotypeByte(byte genotype, ArrayList variants, ArrayList sampleNames, int snpOffset, int sampleOffset, boolean major) {
+// // four genotypes encoded in this byte
+// int[] genotypes = parseGenotypes(genotype);
+// for ( int g : genotypes ) {
+// variants.get(snpOffset).addBinaryGenotypeEntry(g,sampleNames.get(sampleOffset));
+//
+// if ( major ) {
+// sampleOffset++;
+// if ( sampleOffset > sampleNames.size()-1 ) { //using offsets for comparison; size 5 == offset 4
+// return;
+// }
+// } else {
+// snpOffset++;
+// if( snpOffset > variants.size()-1 ) {
+// return;
+// }
+// }
+// }
+// }
+//
+// private int[] parseGenotypes(byte genotype) {
+// int[] genotypes = new int[4];
+// genotypes[0] = ( genotype & 3 );
+// genotypes[1] = ( ( genotype & 12 ) >>> 2 );
+// genotypes[2] = ( ( genotype & 48 ) >>> 4 );
+// genotypes[3] = ( ( genotype & 192 ) >>> 6 );
+// return genotypes;
+// }
+//}
+//
+//class PlinkVariantInfo implements Comparable {
+// private String variantName;
+// private GenomeLoc loc;
+// private ArrayList genotypes = new ArrayList();
+// private ArrayList sampleNames = new ArrayList();
+//
+// private ArrayList indelHolder;
+// private ArrayList sampleHolder;
+// private int siteIndelLength;
+// private Allele.AlleleType indelType;
+//
+// // for binary parsing
+//
+// private String locAllele1;
+// private String locAllele2;
+//
+// public GenomeLoc getLocation() {
+// return loc;
+// }
+//
+// public String getName() {
+// return variantName;
+// }
+//
+// public ArrayList getSampleNames() {
+// return sampleNames;
+// }
+//
+// public ArrayList getGenotypes() {
+// return genotypes;
+// }
+//
+// public boolean isSNP() {
+// return this.indelType == null;
+// }
+//
+// public void setGenomeLoc(GenomeLoc loc) {
+// this.loc = loc;
+// }
+//
+// public void setAlleles(String al1, String al2) {
+// if ( al1.equals("0") ) {
+// // encoding for a site at which no variants were detected
+// locAllele1 = al2;
+// } else {
+// locAllele1 = al1;
+// }
+// locAllele2 = al2;
+// if ( ! isSNP() ) {
+// siteIndelLength = Math.max(locAllele1.length(),locAllele2.length());
+// }
+//
+// }
+//
+// // CONSTRUCTOR
+//
+// public PlinkVariantInfo(String variantName) {
+// this.variantName = variantName;
+// parseName();
+// }
+//
+// public PlinkVariantInfo(String variantName, boolean onlyLookForIndelInfo ) {
+// this.variantName = variantName;
+// if ( onlyLookForIndelInfo ) {
+// parseNameForIndels();
+// } else {
+// parseName();
+// }
+// }
+//
+// private void parseName() {
+// String chrom = this.variantName.split("\\|c")[1].split("_")[0];
+// String pos = this.variantName.split("_p")[1].split("_")[0];
+// this.loc = GenomeLocParser.parseGenomeLoc(chrom+":"+pos);
+// this.parseNameForIndels();
+// }
+//
+// private void parseNameForIndels() {
+// if ( this.variantName.contains("_gI") || this.variantName.contains("_gD") ) {
+// this.instantiateIndel();
+// }
+// }
+//
+// private void instantiateIndel() {
+// this.indelHolder = new ArrayList();
+// this.sampleHolder = new ArrayList();
+// this.siteIndelLength = -1;
+// this.indelType = this.variantName.contains("_gI") ? Allele.AlleleType.INSERTION : Allele.AlleleType.DELETION;
+// }
+//
+// public void addGenotypeEntry(String genotypeString, String sampleName) {
+// // identify if we're dealing with a deletion
+// if ( this.isSNP() ) {
+// this.addSNP(genotypeString.split("\\s+"),sampleName);
+// } else {
+// this.addIndel(genotypeString.split("\\s+"),sampleName);
+// }
+// }
+//
+// public void addBinaryGenotypeEntry( int genoTYPE, String sampleName ) {
+// String[] alleleStr = new String[2];
+// if ( genoTYPE == 0 ) {
+// alleleStr[0] = locAllele1;
+// alleleStr[1] = locAllele1;
+// } else if (genoTYPE == 2) {
+// alleleStr[0] = locAllele1;
+// alleleStr[1] = locAllele2;
+// } else if (genoTYPE == 3 ) {
+// alleleStr[0] = locAllele2;
+// alleleStr[1] = locAllele2;
+// } else {
+// alleleStr[0] = "0";
+// alleleStr[1] = "0";
+// }
+//
+// if ( this.isSNP() ) {
+// this.addSNP(alleleStr,sampleName);
+// } else {
+// this.addIndel(alleleStr,sampleName);
+// }
+// }
+//
+// private void addSNP(String[] alleleStrings, String sampleName) {
+// ArrayList alleles = new ArrayList(2);
+//
+// for ( String alStr : alleleStrings ) {
+// alleles.add(new Allele(Allele.AlleleType.UNKNOWN_POINT_ALLELE,alStr));
+// }
+//
+// genotypes.add(new Genotype(alleles,sampleName,20.0) );
+// sampleNames.add(sampleName);
+// }
+//
+// private void addIndel(String[] alleleStrings, String sampleName) {
+// String alleleStr1 = alleleStrings[0];
+// String alleleStr2 = alleleStrings[1];
+// if ( alleleStr1.contains("-") ^ alleleStr2.contains("-") ) {
+// // heterozygous indel
+// if ( alleleStr1.contains("-") ) {
+// this.addHetIndel(alleleStr2,sampleName) ;
+// } else {
+// this.addHetIndel(alleleStr1,sampleName);
+// }
+// } else {
+// this.addHomIndel(alleleStr1, alleleStr2, sampleName);
+// }
+// }
+//
+// private void addHetIndel(String baseStr, String sampleName) {
+// Allele ref;
+// Allele alt;
+//
+// if ( indelType == Allele.AlleleType.INSERTION ) {
+// ref = new Allele(Allele.AlleleType.REFERENCE,"");
+// alt = new Allele(indelType,baseStr);
+// } else {
+// alt = new Allele(indelType,"");
+// ref = new Allele(Allele.AlleleType.DELETION_REFERENCE,baseStr);
+// }
+//
+// this.setIndelLength(alt,baseStr.length());
+//
+// if ( ! indelHolder.isEmpty() ) {
+// siteIndelLength = baseStr.length();
+// this.addHeldIndels();
+// }
+//
+// Genotype indel = new Genotype(Arrays.asList(ref,alt), sampleName, 20.0);
+// this.setIndelGenotypeLength(indel,siteIndelLength);
+// this.genotypes.add(indel);
+// this.sampleNames.add(sampleName);
+// }
+//
+// private void addHomIndel(String strand1, String strand2, String sampleName) {
+// Allele allele1;
+// Allele allele2;
+// boolean reference;
+// if ( indelType == Allele.AlleleType.DELETION ) {
+// if ( strand1.contains("-") ) {
+// // homozygous deletion
+// allele1 = new Allele(indelType,"");
+// allele2 = new Allele(indelType,"");
+// reference = false;
+// } else {
+// allele1 = new Allele(Allele.AlleleType.DELETION_REFERENCE,strand1);
+// allele2 = new Allele(Allele.AlleleType.DELETION_REFERENCE,strand2);
+// reference = true;
+// }
+// } else {
+// if ( strand1.contains("-") ) {
+// // homozygous reference
+// allele1 = new Allele(Allele.AlleleType.REFERENCE,"");
+// allele2 = new Allele(Allele.AlleleType.REFERENCE,"");
+// reference = true;
+// } else {
+// allele1 = new Allele(indelType,strand1);
+// allele2 = new Allele(indelType,strand2);
+// reference = false;
+// }
+// }
+//
+// if ( reference ) {
+// if ( ! indelHolder.isEmpty() ) {
+// siteIndelLength = strand1.length();
+// this.addHeldIndels();
+// }
+// }
+//
+// if ( reference || siteIndelLength != -1 ) { // if we're ref or know the insertion/deletion length of the site
+// Genotype gen = new Genotype(Arrays.asList(allele1,allele2), sampleName, 20.0);
+// setIndelGenotypeLength(gen,siteIndelLength);
+// this.genotypes.add(gen);
+// this.sampleNames.add(sampleName);
+// } else { // hold on the variants until we *do* know the in/del length at this site
+// this.indelHolder.add(allele1);
+// this.indelHolder.add(allele2);
+// this.sampleHolder.add(sampleName);
+// }
+//
+// }
+//
+// private void setIndelGenotypeLength(Genotype g, int length) {
+// g.setAttribute(Genotype.StandardAttributes.DELETION_LENGTH,length);
+// }
+//
+// private void addHeldIndels() {
+// Allele del1;
+// Allele del2;
+// int startingSize = indelHolder.size();
+// for ( int i = 0; i < startingSize ; i+=2 ) {
+// del1 = indelHolder.get(i);
+// del2 = indelHolder.get(i+1);
+// this.addIndelFromCache(del1,del2,sampleHolder.get(i/2));
+// if ( indelHolder.size() != startingSize ) {
+// throw new StingException("Halting algorithm -- possible infinite loop");
+// }
+// }
+// indelHolder.clear();
+// sampleHolder.clear();
+// }
+//
+// private void addIndelFromCache ( Allele indel1, Allele indel2, String sampleName ) {
+// this.setIndelLength(indel1,siteIndelLength);
+// this.setIndelLength(indel2,siteIndelLength);
+// Genotype indel = new Genotype(Arrays.asList(indel1,indel2),sampleName, 20.0);
+// this.setIndelGenotypeLength(indel,siteIndelLength);
+// this.genotypes.add(indel);
+// this.sampleNames.add(sampleName);
+// }
+//
+// public int compareTo(Object obj) {
+// if ( ! ( obj instanceof PlinkVariantInfo) ) {
+// return 1;
+// }
+//
+// return loc.compareTo(((PlinkVariantInfo) obj).getLocation());
+// }
+//
+// private void setIndelLength(Allele al, int length) {
+// // Todo -- once alleles support deletion lengths add that information
+// // Todo -- into the object; for now this can just return
+// return;
+// }
+//}
+//
+//class PlinkBinaryTrifecta {
+//
+// public PlinkBinaryTrifecta() {
+//
+// }
+//
+// public File bedFile;
+// public File bimFile;
+// public File famFile;
+//
+//}
\ No newline at end of file
diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/PileupWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/PileupWalker.java
index 07707525a..5d55a6625 100644
--- a/java/src/org/broadinstitute/sting/gatk/walkers/PileupWalker.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/PileupWalker.java
@@ -60,11 +60,11 @@ public class PileupWalker extends LocusWalker implements TreeR
@Argument(fullName="alwaysShowSecondBase",doc="If true, prints dummy bases for the second bases in the BAM file where they are missing",required=false)
public boolean alwaysShowSecondBase = false;
- @Argument(fullName="qualsAsInts",doc="If true, prints out qualities in the pileup as comma-separated integers",required=false)
- public boolean qualsAsInts = false;
+ //@Argument(fullName="qualsAsInts",doc="If true, prints out qualities in the pileup as comma-separated integers",required=false)
+ //public boolean qualsAsInts = false;
- @Argument(fullName="ignore_uncovered_bases",shortName="skip_uncov",doc="Output nothing when a base is uncovered")
- public boolean IGNORE_UNCOVERED_BASES = false;
+ //@Argument(fullName="ignore_uncovered_bases",shortName="skip_uncov",doc="Output nothing when a base is uncovered")
+ //public boolean IGNORE_UNCOVERED_BASES = false;
@Argument(fullName="showIndelPileups",shortName="show_indels",doc="In addition to base pileups, generate pileups of extended indel events")
public boolean SHOW_INDEL_PILEUPS = false;
diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/Allele.java b/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/Allele.java
new file mode 100755
index 000000000..c2e093e46
--- /dev/null
+++ b/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/Allele.java
@@ -0,0 +1,126 @@
+package org.broadinstitute.sting.oneoffprojects.variantcontext;
+
+import org.broadinstitute.sting.utils.BaseUtils;
+
+import java.util.Arrays;
+
+/**
+ * @author ebanks, depristo
+ * Types of alleles:
+ *
+ * Ref: a t C g a // C is the reference base
+ *
+ * : a t G g a // C base is a G in some individuals
+ *
+ * : a t - g a // C base is deleted w.r.t. the reference
+ *
+ * : a t CAg a // A base is inserted w.r.t. the reference sequence
+ *
+ * In these cases, where are the alleles?
+ *
+ * SNP polymorphism of C/G -> { C , G } -> C is the reference allele
+ * 1 base deletion of C -> { C , - } -> C is the reference allele
+ * 1 base insertion of A -> { - ; A } -> NULL is the reference allele
+ *
+ * Suppose I see a the following in the population:
+ *
+ * Ref: a t C g a // C is the reference base
+ * : a t G g a // C base is a G in some individuals
+ * : a t - g a // C base is deleted w.r.t. the reference
+ *
+ * How do I represent this? There are three segregating alleles:
+ *
+ * { C , G , - }
+ *
+ * Now suppose I have this more complex example:
+ *
+ * Ref: a t C g a // C is the reference base
+ * : a t - g a
+ * : a t - - a
+ * : a t CAg a
+ *
+ * There are actually four segregating alleles:
+ *
+ * { C g , - g, - -, and CAg } over bases 2-4
+ *
+ * However, the molecular equivalence explicitly listed above is usually discarded, so the actual
+ * segregating alleles are:
+ *
+ * { C g, g, -, C a g }
+ *
+ * Critically, it should be possible to apply an allele to a reference sequence to create the
+ * correct haplotype sequence:
+ *
+ * Allele + reference => haplotype
+ *
+ * For convenience, we are going to create Alleles where the GenomeLoc of the allele is stored outside of the
+ * Allele object itself. So there's an idea of an A/C polymorphism independent of it's surrounding context.
+ *
+ * Given list of alleles it's possible to determine the "type" of the variation
+ *
+ * A / C @ loc => SNP with
+ * - / A => INDEL
+ *
+ * If you know where allele is the reference, you can determine whether the variant is an insertion or deletion
+ */
+public class Allele {
+ private boolean isRef = false;
+ private byte[] bases = null;
+
+ public Allele(byte[] bases, boolean isRef) {
+ bases = new String(bases).toUpperCase().getBytes(); // todo -- slow performance
+ this.isRef = isRef;
+
+ if ( bases == null )
+ throw new IllegalArgumentException("Constructor: the Allele base string cannot be null; use new Allele() or new Allele(\"\") to create a Null allele");
+
+ this.bases = bases;
+ for ( byte b : bases ) {
+ if ( ! BaseUtils.isRegularBase(b) ) {
+ throw new IllegalArgumentException("Unexpected base in allele bases " + new String(bases));
+ }
+ }
+ }
+
+ /** null allele creation method */
+ public Allele(boolean isRef) {
+ this("", isRef);
+ }
+
+ public Allele(String bases, boolean isRef) {
+ this(bases.getBytes(), isRef);
+ }
+
+ //
+ //
+ // accessor routines
+ //
+ //
+ public boolean isNullAllele() { return length() == 0; }
+ public boolean isNonNullAllele() { return ! isNullAllele(); }
+
+ public boolean isReference() { return isRef; }
+ public boolean isNonReference() { return ! isReference(); }
+
+
+ /**
+ * Return the DNA bases segregating in this allele. Note this isn't reference polarized,
+ * so the Null allele is represented by a vector of length 0
+ *
+ * @return the segregating bases
+ */
+ public byte[] getBases() { return bases; }
+
+ /**
+ * @param other the other allele
+ *
+ * @return true if these alleles are equal
+ */
+ public boolean equals(Allele other) {
+ return Arrays.equals(bases, other.getBases());
+ }
+
+ public int length() {
+ return bases.length;
+ }
+}
diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/Genotype.java b/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/Genotype.java
similarity index 82%
rename from java/src/org/broadinstitute/sting/gatk/refdata/Genotype.java
rename to java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/Genotype.java
index cbdb96e3a..5bfd2dbdc 100755
--- a/java/src/org/broadinstitute/sting/gatk/refdata/Genotype.java
+++ b/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/Genotype.java
@@ -1,4 +1,4 @@
-package org.broadinstitute.sting.gatk.refdata;
+package org.broadinstitute.sting.oneoffprojects.variantcontext;
import java.util.*;
@@ -10,11 +10,6 @@ import java.util.*;
* This class emcompasses all the basic information about a genotype
*/
public class Genotype {
-
- public enum StandardAttributes {
- DELETION_LENGTH, INVERSION_LENGTH
- }
-
private List alleles;
private double negLog10PError;
@@ -32,9 +27,9 @@ public class Genotype {
}
/**
- * @return the alleles for this genotype
- */
- public List getAlleles() { return alleles; }
+ * @return the alleles for this genotype
+ */
+ public List getAlleles() { return alleles; }
/**
* @return the ploidy of this genotype
@@ -77,10 +72,10 @@ public class Genotype {
* @return true if all alleles for this genotype are SNPs or reference
*/
public boolean isPointGenotype() {
- for ( Allele allele : alleles ) {
- if ( allele.isVariant() && !allele.isSNP() )
- return false;
- }
+// for ( Allele allele : alleles ) {
+// if ( allele.isVariant() && !allele.isSNP() )
+// return false;
+// }
return true;
}
@@ -88,21 +83,21 @@ public class Genotype {
* @return true if this is a variant genotype, false if it's reference
*/
public boolean isVariant() {
- for ( Allele allele : alleles ) {
- if ( allele.isVariant() )
- return true;
- }
+// for ( Allele allele : alleles ) {
+// if ( allele.isVariant() )
+// return true;
+// }
return false;
}
/**
- * @return the -1 * log10-based error estimate
- */
+ * @return the -1 * log10-based error estimate
+ */
public double getNegLog10PError() { return negLog10PError; }
/**
- * @return the sample name
- */
+ * @return the sample name
+ */
public String getSample() { return sample; }
/**
diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/VariantContext.java b/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/VariantContext.java
new file mode 100755
index 000000000..c9e56742a
--- /dev/null
+++ b/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/VariantContext.java
@@ -0,0 +1,440 @@
+package org.broadinstitute.sting.oneoffprojects.variantcontext;
+
+import org.broadinstitute.sting.utils.GenomeLoc;
+import org.broadinstitute.sting.utils.StingException;
+import org.broadinstitute.sting.gatk.refdata.*;
+
+import java.util.*;
+import org.apache.commons.jexl.*;
+
+
+/**
+ * @author ebanks
+ *
+ * Class VariantContext
+ *
+ * This class represents a context that unifies one or more variants
+ */
+public class VariantContext {
+ private GenomeLoc loc;
+
+ private Set alleles = new HashSet();
+
+ private Set genotypes = new HashSet();
+
+ private HashMap attributes = new HashMap();
+
+ Type type = null;
+
+ private double negLog10PError = 0.0; // todo - fixme
+
+ /** Have we checked this VariantContext already? */
+ private boolean validatedP = false;
+
+// public VariantContext(VariationRod rod) {
+//
+// // TODO -- VariationRod should eventually require classes to implement toVariationContext()
+// // TODO -- (instead of using a temporary adapter class)
+//
+// loc = rod.getLocation();
+// reference = new Allele(Allele.AlleleType.REFERENCE, rod.getReference());
+//
+// // TODO -- populate the alleles and genotypes through an adapter
+// alleles = new HashSet();
+// genotypes = new HashSet();
+//
+// attributes = new HashMap();
+// }
+
+ // ---------------------------------------------------------------------------------------------------------
+ //
+ // constructors
+ //
+ // ---------------------------------------------------------------------------------------------------------
+
+ public VariantContext(GenomeLoc loc) {
+ if ( loc == null ) { throw new StingException("GenomeLoc cannot be null"); }
+
+ this.loc = loc;
+ }
+
+ protected VariantContext(VariantContext parent, Set genotypes, HashMap attributes) {
+ this(parent.getLocation(), parent.getAlleles(), genotypes, attributes);
+ }
+
+ public VariantContext(GenomeLoc loc, Set alleles, Set genotypes, HashMap attributes) {
+ this(loc);
+
+ // todo -- add extensive testing here
+
+ // todo -- check that exactly one allele is tagged as reference
+
+ this.alleles = new HashSet(alleles);
+ this.genotypes = new HashSet(genotypes);
+ this.attributes = new HashMap(attributes);
+ }
+
+ // ---------------------------------------------------------------------------------------------------------
+ //
+ // type operations
+ //
+ // ---------------------------------------------------------------------------------------------------------
+
+ /**
+ * see: http://www.ncbi.nlm.nih.gov/bookshelf/br.fcgi?book=handbook&part=ch5&rendertype=table&id=ch5.ch5_t3
+ *
+ * Format:
+ * dbSNP variation class
+ * Rules for assigning allele classes
+ * Sample allele definition
+ *
+ * Single Nucleotide Polymorphisms (SNPs)a
+ * Strictly defined as single base substitutions involving A, T, C, or G.
+ * A/T
+ *
+ * Deletion/Insertion Polymorphisms (DIPs)
+ * Designated using the full sequence of the insertion as one allele, and either a fully
+ * defined string for the variant allele or a Ò-Ó character to specify the deleted allele.
+ * This class will be assigned to a variation if the variation alleles are of different lengths or
+ * if one of the alleles is deleted (Ò-Ó).
+ * T/-/CCTA/G
+ *
+ * No-variation
+ * Reports may be submitted for segments of sequence that are assayed and determined to be invariant
+ * in the sample.
+ * (NoVariation)
+ *
+ * Mixed
+ * Mix of other classes
+ *
+ *
+ * Not currently supported:
+ *
+ * Heterozygous sequencea
+ * The term heterozygous is used to specify a region detected by certain methods that do not
+ * resolve the polymorphism into a specific sequence motif. In these cases, a unique flanking
+ * sequence must be provided to define a sequence context for the variation.
+ * (heterozygous)
+ *
+ * Microsatellite or short tandem repeat (STR)
+ * Alleles are designated by providing the repeat motif and the copy number for each allele.
+ * Expansion of the allele repeat motif designated in dbSNP into full-length sequence will
+ * be only an approximation of the true genomic sequence because many microsatellite markers are
+ * not fully sequenced and are resolved as size variants only.
+ * (CAC)8/9/10/11
+ *
+ * Named variant
+ * Applies to insertion/deletion polymorphisms of longer sequence features, such as retroposon
+ * dimorphism for Alu or line elements. These variations frequently include a deletion Ò-Ó indicator
+ * for the absent allele.
+ * (alu) / -
+ *
+ * Multi-Nucleotide Polymorphism (MNP)
+ * Assigned to variations that are multi-base variations of a single, common length
+ * GGA/AGT
+ */
+
+ public enum Type {
+ NO_VARIATION,
+ SNP,
+ INDEL,
+ MIXED
+ }
+
+ /**
+ * convenience method for switching over the allele type
+ *
+ * @return the AlleleType of this allele
+ **/
+ public Type getType() {
+ if ( type == null )
+ determineType();
+
+ return type;
+ }
+
+ /**
+ * convenience method for SNPs
+ *
+ * @return true if this is a SNP, false otherwise
+ */
+ public boolean isSNP() { return getType() == Type.SNP; }
+
+ /**
+ * convenience method for variants
+ *
+ * @return true if this is a variant allele, false if it's reference
+ */
+ public boolean isVariant() { return getType() != Type.NO_VARIATION; }
+
+
+ /**
+ * convenience method for indels
+ *
+ * @return true if this is an indel, false otherwise
+ */
+ public boolean isIndel() { return getType() == Type.INDEL; }
+
+
+ /**
+ * convenience method for indels
+ *
+ * @return true if this is an mixed variation, false otherwise
+ */
+ public boolean isMixed() { return getType() == Type.MIXED; }
+
+ // ---------------------------------------------------------------------------------------------------------
+ //
+ // Generic accessors
+ //
+ // ---------------------------------------------------------------------------------------------------------
+
+ /**
+ * @return the location of this context
+ */
+ public GenomeLoc getLocation() { return loc; }
+
+
+ // ---------------------------------------------------------------------------------------------------------
+ //
+ // Working with alleles
+ //
+ // ---------------------------------------------------------------------------------------------------------
+
+ /**
+ * @return the reference allele for this context
+ */
+ public Allele getReference() {
+ for ( Allele allele : getAlleles() )
+ if ( allele.isReference() )
+ return allele;
+
+ throw new StingException("BUG: no reference allele found at " + this);
+ }
+
+ /**
+ * @return true if the context is strictly bi-allelic
+ */
+ public boolean isBiallelic() {
+ //return getAlternateAlleles().size() == 1;
+ return getAlleles().size() == 2;
+ }
+
+ /**
+ * Gets the alleles. This method should return all of the alleles present at the location,
+ * including the reference allele. There are no constraints imposed on the ordering of alleles
+ * in the set. If the reference is not an allele in this context it will not be included.
+ *
+ * @return the set of alleles
+ */
+ public Set getAlleles() { return alleles; }
+
+ /**
+ * Gets the alternate alleles. This method should return all the alleles present at the location,
+ * NOT including the reference allele. There are no constraints imposed on the ordering of alleles
+ * in the set.
+ *
+ * @return the set of alternate alleles
+ */
+ public Set getAlternateAlleles() {
+ HashSet altAlleles = new HashSet();
+ for ( Allele allele : alleles ) {
+ if ( allele.isNonReference() )
+ altAlleles.add(allele);
+ }
+
+ return altAlleles;
+ }
+
+ // ---------------------------------------------------------------------------------------------------------
+ //
+ // Working with genotypes
+ //
+ // ---------------------------------------------------------------------------------------------------------
+
+ /**
+ * @return true if the context represents variants with associated genotypes
+ */
+ public boolean hasGenotypes() { return genotypes.size() > 0; }
+
+ /**
+ * @return set of all Genotypes associated with this context
+ */
+
+ // todo -- genotypes should really be stored as map, not set
+ public Set getGenotypes() { return genotypes; }
+
+ public Map getGenotypeMap() {
+ HashMap map = new HashMap();
+ for ( Genotype g : genotypes )
+ map.put(g.getSample(), g);
+ return map;
+ }
+
+
+ /**
+ * @return the set of all sample names in this context
+ */
+ public Set getSampleNames() {
+ return getGenotypeMap().keySet();
+ }
+
+ /**
+ * @param sample the sample name
+ *
+ * @return the Genotype associated with the given sample in this context or null if the sample is not in this context
+ */
+ public Genotype getGenotype(String sample) {
+ return getGenotypeMap().get(sample);
+ }
+
+ // ---------------------------------------------------------------------------------------------------------
+ //
+ // Working with attributes
+ //
+ // ---------------------------------------------------------------------------------------------------------
+
+ // todo -- refactor into AttributedObject and have VariantContext and Genotype inherit from them
+
+ // todo -- define common attributes as enum
+
+ /**
+ * Sets the given attribute
+ *
+ * @param key the attribute key
+ * @param value the attribute value
+ */
+ public void putAttribute(Object key, Object value) {
+ attributes.put(key, value);
+ }
+
+ public void putAttributes(Map extends Object, Object> map) {
+ attributes.putAll(map);
+ }
+
+ public boolean hasAttribute(Object key) {
+ return attributes.containsKey(key);
+ }
+
+ public int getNumAttributes() {
+ return attributes.size();
+ }
+
+ /**
+ * @param key the attribute key
+ *
+ * @return the attribute value for the given key (or null if not set)
+ */
+ public Object getAttribute(Object key) {
+ return attributes.get(key);
+ }
+
+ public Object getAttribute(Object key, Object defaultValue) {
+ if ( hasAttribute(key) )
+ return attributes.get(key);
+ else
+ return defaultValue;
+ }
+
+ public String getAttributeAsString(Object key) { return (String)getAttribute(key); }
+ public int getAttributeAsInt(Object key) { return (Integer)getAttribute(key); }
+ public double getAttributeAsDouble(Object key) { return (Double)getAttribute(key); }
+
+ public String getAttributeAsString(Object key, String defaultValue) { return (String)getAttribute(key, defaultValue); }
+ public int getAttributeAsInt(Object key, int defaultValue) { return (Integer)getAttribute(key, defaultValue); }
+ public double getAttributeAsDouble(Object key, double defaultValue) { return (Double)getAttribute(key, defaultValue); }
+
+ /**
+ * @return the attribute map
+ */
+ public Map getAttributes() {
+ return attributes;
+ }
+
+ // ---------------------------------------------------------------------------------------------------------
+ //
+ // validation
+ //
+ // ---------------------------------------------------------------------------------------------------------
+
+ /**
+ * To be called by any modifying routines
+ */
+ private void invalidate() { validatedP = false; }
+
+ public boolean validate() {
+ return validate(true);
+ }
+
+ public boolean validate(boolean throwException) {
+ if ( ! validatedP ) {
+ boolean valid = false;
+ // todo -- add extensive validation checking here
+ if ( valid ) {
+ validatedP = valid;
+ } else if ( throwException ) {
+ throw new StingException(this + " failed validation");
+ }
+
+ return valid;
+ } else {
+ return validatedP;
+ }
+ }
+
+ // ---------------------------------------------------------------------------------------------------------
+ //
+ // utility routines
+ //
+ // ---------------------------------------------------------------------------------------------------------
+
+ private void determineType() {
+ if ( type == null ) {
+ // todo -- figure out the variation type
+ }
+ }
+
+ // todo -- toString() method
+
+ /**
+ * @return true if the context represents point alleles only (i.e. no indels or structural variants)
+ */
+// public boolean isPointAllele() {
+// for ( Allele allele : alleles ) {
+// if ( allele.isVariant() && !allele.isSNP() )
+// return false;
+// }
+// return true;
+// }
+//
+
+// /**
+// * @return set of all subclasses within this context
+// */
+// public Set getSubclasses() {
+// Set subclasses = new HashSet();
+// for ( Genotype g : genotypes )
+// subclasses.addAll(g.getAttributes().keySet());
+// return subclasses;
+// }
+
+ /**
+ * @param allele the allele to be queried
+ *
+ * @return the frequency of the given allele in this context
+ */
+ public double getAlleleFrequency(Allele allele) {
+ int alleleCount = 0;
+ int totalCount = 0;
+
+ for ( Genotype g : genotypes ) {
+ for ( Allele a : g.getAlleles() ) {
+ totalCount++;
+ if ( allele.equals(a) )
+ alleleCount++;
+ }
+ }
+
+ return totalCount == 0 ? 0.0 : (double)alleleCount / (double)totalCount;
+ }
+}
\ No newline at end of file
diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/VariantContextUtils.java b/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/VariantContextUtils.java
new file mode 100755
index 000000000..f3842377b
--- /dev/null
+++ b/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/VariantContextUtils.java
@@ -0,0 +1,104 @@
+package org.broadinstitute.sting.oneoffprojects.variantcontext;
+
+import java.util.*;
+import org.apache.commons.jexl.*;
+
+
+public class VariantContextUtils {
+ private static final String UNIQUIFIED_SUFFIX = ".unique";
+
+ /**
+ * @param other another variant context
+ *
+ * throws an exception if there is a collision such that the same sample exists in both contexts
+ * @return a context representing the merge of this context and the other provided context
+ */
+ public VariantContext merge(VariantContext left, VariantContext other) {
+ return merge(left, other, false);
+ }
+
+ /**
+ * @param other another variant context
+ * @param uniquifySamples if true and there is a collision such that the same sample exists in both contexts,
+ * the samples will be uniquified(based on their sources);
+ * otherwise, an exception will be thrown
+ *
+ * @return a context representing the merge of this context and the other provided context
+ */
+ public VariantContext merge(VariantContext left, VariantContext other, boolean uniquifySamples) {
+ // todo -- make functional
+
+ if ( !left.getLocation().equals(other.getLocation()) )
+ throw new IllegalArgumentException("The locations must be identical for two contexts to be merged");
+
+ Set samples = left.getSampleNames();
+ Set Gs = new HashSet(left.getGenotypes());
+
+ for ( Genotype g : other.getGenotypes() ) {
+ if ( samples.contains(g.getSample()) ) {
+ if ( uniquifySamples )
+ g.setSample(g.getSample() + UNIQUIFIED_SUFFIX);
+ else
+ throw new IllegalStateException("The same sample name exists in both contexts when attempting to merge");
+ }
+ Gs.add(g);
+ }
+
+ HashMap attrs = new HashMap(left.getAttributes());
+ attrs.putAll(other.getAttributes());
+
+ return new VariantContext(left, Gs, attrs);
+ }
+
+
+ /**
+ * @param subclass the name of a subclass of variants to select
+ *
+ * @return a subset of this context which selects based on the given subclass
+ */
+// public VariantContextUtils select(String subclass) {
+// HashSet Gs = new HashSet();
+// for ( Genotype g : genotypes ) {
+// if ( g.getAttribute(subclass) != null )
+// Gs.add(g);
+// }
+// return createNewContext(Gs, attributes);
+// }
+
+ /**
+ * @param expr a jexl expression describing how to filter this context
+ *
+ * @return a subset of this context which is filtered based on the given expression
+ */
+// public VariantContextUtils filter(String expr) {
+// HashSet Gs = new HashSet();
+// try {
+// Expression filterExpression = ExpressionFactory.createExpression(expr);
+//
+// for ( Genotype g : genotypes ) {
+// JexlContext jContext = JexlHelper.createContext();
+// jContext.setVars(g.getAttributes());
+// if ( (Boolean)filterExpression.evaluate(jContext) )
+// Gs.add(g);
+// }
+//
+// } catch (Exception e) {
+// throw new StingException("JEXL error in VariantContext: " + e.getMessage());
+// }
+//
+// return createNewContext(Gs, attributes);
+// }
+
+ /**
+ * @return a set of new variant contexts, one for each sample from this context
+ */
+// public Set splitBySample() {
+// Set contexts = new HashSet();
+// for ( Genotype g : genotypes ) {
+// HashSet gAsSet = new HashSet();
+// gAsSet.add(g);
+// contexts.add(createNewContext(gAsSet, attributes));
+// }
+// return contexts;
+// }
+}
\ No newline at end of file
diff --git a/java/test/org/broadinstitute/sting/gatk/refdata/PlinkRodTest.java b/java/test/org/broadinstitute/sting/gatk/refdata/PlinkRodTest.java
index 6b85a228c..509abfbbc 100755
--- a/java/test/org/broadinstitute/sting/gatk/refdata/PlinkRodTest.java
+++ b/java/test/org/broadinstitute/sting/gatk/refdata/PlinkRodTest.java
@@ -1,243 +1,243 @@
-package org.broadinstitute.sting.gatk.refdata;
-
-import org.broadinstitute.sting.BaseTest;
-import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
-import org.broadinstitute.sting.utils.StingException;
-import org.broadinstitute.sting.utils.GenomeLocParser;
-import org.broadinstitute.sting.utils.GenomeLoc;
-import org.junit.BeforeClass;
-import org.junit.Test;
-import org.junit.Assert;
-
-import java.io.File;
-import java.io.FileNotFoundException;
-import java.io.BufferedReader;
-import java.io.FileReader;
-import java.util.*;
-
-/**
- * Created by IntelliJ IDEA.
- * User: Ghost
- * Date: Jan 22, 2010
- * Time: 11:27:33 PM
- * To change this template use File | Settings | File Templates.
- */
-public class PlinkRodTest extends BaseTest {
- private static IndexedFastaSequenceFile seq;
-
- @BeforeClass
- public static void beforeTests() {
- try {
- seq = new IndexedFastaSequenceFile(new File(oneKGLocation + "reference/human_b36_both.fasta"));
- } catch (FileNotFoundException e) {
- throw new StingException("unable to load the sequence dictionary");
- }
- GenomeLocParser.setupRefContigOrdering(seq);
-
- }
-
- public BufferedReader openFile(String filename) {
- try {
- return new BufferedReader(new FileReader(filename));
- } catch (FileNotFoundException e) {
- throw new StingException("Couldn't open file " + filename);
- }
-
- }
-
- @Test
- public void testStandardPedFile() {
- PlinkRod rod = new PlinkRod("test");
- try {
- rod.initialize( new File("/humgen/gsa-hpprojects/GATK/data/Validation_Data/test/plink_rod_test/standard_plink_test.ped") );
- } catch ( FileNotFoundException e ) {
- throw new StingException("test file for testStandardPedFile() does not exist",e);
- }
-
- // test that the sample names are correct
-
- List rodNames = rod.getVariantSampleNames();
- List expectedNames = Arrays.asList("NA12887","NAMELY","COWBA");
-
- Assert.assertEquals("That there are as many samples in the rod as in the expected list",expectedNames.size(),rodNames.size());
-
- boolean namesCorrect = true;
- for ( int i = 0; i < expectedNames.size(); i++ ) {
- namesCorrect = namesCorrect && ( rodNames.get(i).equals(expectedNames.get(i)) );
- }
-
- Assert.assertTrue("That the names are correct and in the proper order",namesCorrect);
-
- // test that rod can be iterated over
-
- ArrayList> genotypesInRod = new ArrayList>();
- ArrayList> sampleNamesInRod = new ArrayList>();
- ArrayList lociInRod = new ArrayList();
- do {
- genotypesInRod.add(rod.getGenotypes());
- sampleNamesInRod.add(rod.getVariantSampleNames());
- lociInRod.add(rod.getLocation());
- } while ( rod.parseLine(null,null) );
-
- Assert.assertEquals("That there are three SNPs in the ROD",3,genotypesInRod.size());
-
- ArrayList snp1 = genotypesInRod.get(0);
- ArrayList snp3 = genotypesInRod.get(2);
-
- Assert.assertEquals("That there are three Genotypes in SNP 1",3,snp1.size());
- Assert.assertEquals("That there are three samples in SNP 2", 3, sampleNamesInRod.get(1).size());
- Assert.assertEquals("That there are three Genotypes in SNP 3",3,snp3.size());
-
-
- List snp1_individual3_alleles = snp1.get(2).getAlleles();
- List snp3_individual2_alleles = snp3.get(1).getAlleles();
-
- String alleleStr1 = snp1_individual3_alleles.get(0).getBases()+snp1_individual3_alleles.get(1).getBases();
- String alleleStr2 = snp3_individual2_alleles.get(0).getBases()+snp3_individual2_alleles.get(1).getBases();
-
- Assert.assertEquals("That the third genotype of snp 1 is correctly no-call","00",alleleStr1);
- Assert.assertEquals("That the second genotype of snp 3 is correctly G G","GG",alleleStr2);
-
- boolean name2isSame = true;
-
- for ( ArrayList names : sampleNamesInRod ) {
- name2isSame = name2isSame && names.get(1).equals("NAMELY");
- }
-
- Assert.assertTrue("That the second name of all the genotypes is the same and is correct",name2isSame);
-
- // test that the loci are correctly parsed and in order
-
- List expectedLoci = Arrays.asList("1:123456","2:13274","3:11111");
- boolean lociCorrect = true;
- for ( int i = 0; i < 3; i ++ ) {
- lociCorrect = lociCorrect && lociInRod.get(i).toString().equals(expectedLoci.get(i));
- }
- }
-
- @Test
- public void testStandardPedFileWithIndels() {
- PlinkRod rod = new PlinkRod("test");
- try {
- rod.initialize(new File("/humgen/gsa-hpprojects/GATK/data/Validation_Data/test/plink_rod_test/standard_plink_with_indels.ped") );
- } catch ( FileNotFoundException e) {
- throw new StingException("Test file for testStandardPedFileWithIndels() could not be found", e);
- }
-
- // Iterate through the rod
-
- List> genotypesInRod = new ArrayList>();
- ArrayList> sampleNamesInRod = new ArrayList>();
- ArrayList lociInRod = new ArrayList();
- ArrayList snpSites = new ArrayList();
- do {
- genotypesInRod.add(rod.getGenotypes());
- sampleNamesInRod.add(rod.getVariantSampleNames());
- lociInRod.add(rod.getLocation());
- snpSites.add(rod.variantIsSNP());
- } while ( rod.parseLine(null,null) );
-
- boolean snpOrder = true;
- List expectedOrder = Arrays.asList(true,false,true,false);
- for ( int i = 0; i < 4; i ++ ) {
- snpOrder = snpOrder && ( expectedOrder.get(i) == snpSites.get(i) );
- }
-
- Assert.assertTrue("That the variant type order is as expected", snpOrder);
- Assert.assertTrue("That the second genotype of second variant is not a point mutation", ! genotypesInRod.get(1).get(1).isPointGenotype() );
- Assert.assertTrue("That the second genotype of fourth variant is not a point mutation", ! genotypesInRod.get(3).get(1).isPointGenotype() );
- Assert.assertTrue("That the second genotype of fourth variant is homozygous", genotypesInRod.get(3).get(1).isHom());
- Assert.assertTrue("That the fourth genotype of fourth variant is heterozygous",genotypesInRod.get(3).get(3).isHet());
- Assert.assertEquals("That the reference deletion genotype has the correct string", "ATTTAT",genotypesInRod.get(3).get(2).getAlleles().get(0).getBases());
- Assert.assertEquals("That the insertion bases are correct","CTC",genotypesInRod.get(1).get(2).getAlleles().get(0).getBases());
- Assert.assertEquals("That the snp bases are correct","GC",genotypesInRod.get(2).get(2).getAlleles().get(0).getBases()+genotypesInRod.get(2).get(2).getAlleles().get(1).getBases());
- }
-
- @Test
- public void testBinaryPedFileNoIndels() {
- PlinkRod rod = new PlinkRod("binaryTest1");
- try {
- rod.initialize(new File("/humgen/gsa-hpprojects/GATK/data/Validation_Data/test/plink_rod_test/binary_noindel_test.bed"));
- } catch (FileNotFoundException e) {
- throw new StingException("Test file for testBinaryPedFileNoIndels() could not be found",e);
- }
-
- // iterate through the ROD and get stuff
- ArrayList lociInRod = new ArrayList();
- ArrayList> genotypesInRod = new ArrayList>();
- ArrayList> samplesInRod = new ArrayList>();
-
- do {
- lociInRod.add(rod.getLocation());
- genotypesInRod.add(rod.getGenotypes());
- samplesInRod.add(rod.getVariantSampleNames());
- } while ( rod.parseLine(null,null) );
-
- List expecLoc = Arrays.asList("1:123456","1:14327877","2:22074511","3:134787","3:178678","4:829645","4:5234132","12:1268713");
-
- for ( int i = 0; i < expecLoc.size(); i ++ ) {
- Assert.assertEquals("That locus "+(i+1)+" in the rod is correct", expecLoc.get(i), lociInRod.get(i).toString());
- }
-
- List expecAlleles = Arrays.asList("AA","AA","AA","GG","GG","GG","AA","TA","TT","CC","CC","GC","TC","CC","TT",
- "GG","GG","AG","TT","CC","CT","TG","GG","GG");
- List expecHet = Arrays.asList(false,false,false,false,false,false,false,true,false,false,false,true,true,false,
- false,false,false,true,false,false,true,true,false,false);
- List expecName = Arrays.asList("NA12878","NA12890","NA07000","NA12878","NA12890","NA07000","NA12878","NA12890","NA07000",
- "NA12878","NA12890","NA07000","NA12878","NA12890","NA07000","NA12878","NA12890","NA07000","NA12878","NA12890","NA07000",
- "NA12878","NA12890","NA07000");
- int snpNo = 1;
- int indiv = 1;
- int alleleOffset = 0;
- for ( ArrayList snp : genotypesInRod ) {
- for ( Genotype gen : snp ) {
- String alStr = gen.getAlleles().get(0).getBases()+gen.getAlleles().get(1).getBases();
- Assert.assertEquals("That the allele of person "+indiv+" for snp "+snpNo+" is correct "+
- "(allele offset "+alleleOffset+")", expecAlleles.get(alleleOffset),alStr);
- Assert.assertEquals("That the genotype of person "+indiv+" for snp "+snpNo+" is properly set", expecHet.get(alleleOffset),gen.isHet());
- Assert.assertEquals("That the name of person "+indiv+" for snp "+snpNo+" is correct", expecName.get(alleleOffset),samplesInRod.get(snpNo-1).get(indiv-1));
- indiv++;
- alleleOffset++;
- }
- indiv = 1;
- snpNo++;
- }
- }
-
- @Test
- public void testIndelBinary() {
- PlinkRod rod = new PlinkRod("binaryTest2");
- try {
- rod.initialize(new File("/humgen/gsa-hpprojects/GATK/data/Validation_Data/test/plink_rod_test/binary_indel_test.bed"));
- } catch (FileNotFoundException e) {
- throw new StingException("Test file for testBinaryPedFileNoIndels() could not be found",e);
- }
-
- ArrayList> genotypesInRod = new ArrayList>();
- do {
- genotypesInRod.add(rod.getGenotypes());
- } while ( rod.parseLine(null,null) );
-
- List expecAlleles = Arrays.asList("ACCA","","ACCAACCA","GGGG","GG","","AA","TA","00","","CCTCCT","CCT",
- "TC","CC","TT","GG","GG","AG","","CTTGCTTG","CTTG","TG","GG","GG");
- List expecDeletion = Arrays.asList(false,false,false,false,false,false,false,false,false,true,false,true,
- false,false,false,false,false,false,true,false,true,false,false,false);
- List expecInsertion = Arrays.asList(true,false,true,true,true,false,false,false,false,false,false,false,
- false,false,false,false,false,false,false,false,false,false,false,false);
-
- int al = 0;
- for ( ArrayList snp : genotypesInRod ) {
- for ( Genotype gen : snp ) {
- String alStr = gen.getAlleles().get(0).getBases()+gen.getAlleles().get(1).getBases();
- Allele firstAl = gen.getAlleles().get(0);
- Allele secondAl = gen.getAlleles().get(1);
- boolean isInsertion = ( firstAl.getType() == Allele.AlleleType.INSERTION || secondAl.getType() == Allele.AlleleType.INSERTION );
- boolean isDeletion = ( firstAl.getType() == Allele.AlleleType.DELETION || secondAl.getType() == Allele.AlleleType.DELETION );
- Assert.assertEquals("That the indel file has the correct alleles for genotype "+al,expecAlleles.get(al), alStr);
- Assert.assertEquals("That the insertion property of genotype "+al+" is correct",expecInsertion.get(al),isInsertion);
- Assert.assertEquals("That the deletion property of genotype "+al+" is correct", expecDeletion.get(al), isDeletion);
- al++;
- }
- }
- }
-}
+//package org.broadinstitute.sting.gatk.refdata;
+//
+//import org.broadinstitute.sting.BaseTest;
+//import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
+//import org.broadinstitute.sting.utils.StingException;
+//import org.broadinstitute.sting.utils.GenomeLocParser;
+//import org.broadinstitute.sting.utils.GenomeLoc;
+//import org.junit.BeforeClass;
+//import org.junit.Test;
+//import org.junit.Assert;
+//
+//import java.io.File;
+//import java.io.FileNotFoundException;
+//import java.io.BufferedReader;
+//import java.io.FileReader;
+//import java.util.*;
+//
+///**
+// * Created by IntelliJ IDEA.
+// * User: Ghost
+// * Date: Jan 22, 2010
+// * Time: 11:27:33 PM
+// * To change this template use File | Settings | File Templates.
+// */
+//public class PlinkRodTest extends BaseTest {
+// private static IndexedFastaSequenceFile seq;
+//
+// @BeforeClass
+// public static void beforeTests() {
+// try {
+// seq = new IndexedFastaSequenceFile(new File(oneKGLocation + "reference/human_b36_both.fasta"));
+// } catch (FileNotFoundException e) {
+// throw new StingException("unable to load the sequence dictionary");
+// }
+// GenomeLocParser.setupRefContigOrdering(seq);
+//
+// }
+//
+// public BufferedReader openFile(String filename) {
+// try {
+// return new BufferedReader(new FileReader(filename));
+// } catch (FileNotFoundException e) {
+// throw new StingException("Couldn't open file " + filename);
+// }
+//
+// }
+//
+// @Test
+// public void testStandardPedFile() {
+// PlinkRod rod = new PlinkRod("test");
+// try {
+// rod.initialize( new File("/humgen/gsa-hpprojects/GATK/data/Validation_Data/test/plink_rod_test/standard_plink_test.ped") );
+// } catch ( FileNotFoundException e ) {
+// throw new StingException("test file for testStandardPedFile() does not exist",e);
+// }
+//
+// // test that the sample names are correct
+//
+// List rodNames = rod.getVariantSampleNames();
+// List expectedNames = Arrays.asList("NA12887","NAMELY","COWBA");
+//
+// Assert.assertEquals("That there are as many samples in the rod as in the expected list",expectedNames.size(),rodNames.size());
+//
+// boolean namesCorrect = true;
+// for ( int i = 0; i < expectedNames.size(); i++ ) {
+// namesCorrect = namesCorrect && ( rodNames.get(i).equals(expectedNames.get(i)) );
+// }
+//
+// Assert.assertTrue("That the names are correct and in the proper order",namesCorrect);
+//
+// // test that rod can be iterated over
+//
+// ArrayList> genotypesInRod = new ArrayList>();
+// ArrayList> sampleNamesInRod = new ArrayList>();
+// ArrayList lociInRod = new ArrayList();
+// do {
+// genotypesInRod.add(rod.getGenotypes());
+// sampleNamesInRod.add(rod.getVariantSampleNames());
+// lociInRod.add(rod.getLocation());
+// } while ( rod.parseLine(null,null) );
+//
+// Assert.assertEquals("That there are three SNPs in the ROD",3,genotypesInRod.size());
+//
+// ArrayList snp1 = genotypesInRod.get(0);
+// ArrayList snp3 = genotypesInRod.get(2);
+//
+// Assert.assertEquals("That there are three Genotypes in SNP 1",3,snp1.size());
+// Assert.assertEquals("That there are three samples in SNP 2", 3, sampleNamesInRod.get(1).size());
+// Assert.assertEquals("That there are three Genotypes in SNP 3",3,snp3.size());
+//
+//
+// List snp1_individual3_alleles = snp1.get(2).getAlleles();
+// List snp3_individual2_alleles = snp3.get(1).getAlleles();
+//
+// String alleleStr1 = snp1_individual3_alleles.get(0).getBases()+snp1_individual3_alleles.get(1).getBases();
+// String alleleStr2 = snp3_individual2_alleles.get(0).getBases()+snp3_individual2_alleles.get(1).getBases();
+//
+// Assert.assertEquals("That the third genotype of snp 1 is correctly no-call","00",alleleStr1);
+// Assert.assertEquals("That the second genotype of snp 3 is correctly G G","GG",alleleStr2);
+//
+// boolean name2isSame = true;
+//
+// for ( ArrayList names : sampleNamesInRod ) {
+// name2isSame = name2isSame && names.get(1).equals("NAMELY");
+// }
+//
+// Assert.assertTrue("That the second name of all the genotypes is the same and is correct",name2isSame);
+//
+// // test that the loci are correctly parsed and in order
+//
+// List expectedLoci = Arrays.asList("1:123456","2:13274","3:11111");
+// boolean lociCorrect = true;
+// for ( int i = 0; i < 3; i ++ ) {
+// lociCorrect = lociCorrect && lociInRod.get(i).toString().equals(expectedLoci.get(i));
+// }
+// }
+//
+// @Test
+// public void testStandardPedFileWithIndels() {
+// PlinkRod rod = new PlinkRod("test");
+// try {
+// rod.initialize(new File("/humgen/gsa-hpprojects/GATK/data/Validation_Data/test/plink_rod_test/standard_plink_with_indels.ped") );
+// } catch ( FileNotFoundException e) {
+// throw new StingException("Test file for testStandardPedFileWithIndels() could not be found", e);
+// }
+//
+// // Iterate through the rod
+//
+// List> genotypesInRod = new ArrayList>();
+// ArrayList> sampleNamesInRod = new ArrayList>();
+// ArrayList lociInRod = new ArrayList();
+// ArrayList snpSites = new ArrayList();
+// do {
+// genotypesInRod.add(rod.getGenotypes());
+// sampleNamesInRod.add(rod.getVariantSampleNames());
+// lociInRod.add(rod.getLocation());
+// snpSites.add(rod.variantIsSNP());
+// } while ( rod.parseLine(null,null) );
+//
+// boolean snpOrder = true;
+// List expectedOrder = Arrays.asList(true,false,true,false);
+// for ( int i = 0; i < 4; i ++ ) {
+// snpOrder = snpOrder && ( expectedOrder.get(i) == snpSites.get(i) );
+// }
+//
+// Assert.assertTrue("That the variant type order is as expected", snpOrder);
+// Assert.assertTrue("That the second genotype of second variant is not a point mutation", ! genotypesInRod.get(1).get(1).isPointGenotype() );
+// Assert.assertTrue("That the second genotype of fourth variant is not a point mutation", ! genotypesInRod.get(3).get(1).isPointGenotype() );
+// Assert.assertTrue("That the second genotype of fourth variant is homozygous", genotypesInRod.get(3).get(1).isHom());
+// Assert.assertTrue("That the fourth genotype of fourth variant is heterozygous",genotypesInRod.get(3).get(3).isHet());
+// Assert.assertEquals("That the reference deletion genotype has the correct string", "ATTTAT",genotypesInRod.get(3).get(2).getAlleles().get(0).getBases());
+// Assert.assertEquals("That the insertion bases are correct","CTC",genotypesInRod.get(1).get(2).getAlleles().get(0).getBases());
+// Assert.assertEquals("That the snp bases are correct","GC",genotypesInRod.get(2).get(2).getAlleles().get(0).getBases()+genotypesInRod.get(2).get(2).getAlleles().get(1).getBases());
+// }
+//
+// @Test
+// public void testBinaryPedFileNoIndels() {
+// PlinkRod rod = new PlinkRod("binaryTest1");
+// try {
+// rod.initialize(new File("/humgen/gsa-hpprojects/GATK/data/Validation_Data/test/plink_rod_test/binary_noindel_test.bed"));
+// } catch (FileNotFoundException e) {
+// throw new StingException("Test file for testBinaryPedFileNoIndels() could not be found",e);
+// }
+//
+// // iterate through the ROD and get stuff
+// ArrayList lociInRod = new ArrayList();
+// ArrayList> genotypesInRod = new ArrayList>();
+// ArrayList> samplesInRod = new ArrayList>();
+//
+// do {
+// lociInRod.add(rod.getLocation());
+// genotypesInRod.add(rod.getGenotypes());
+// samplesInRod.add(rod.getVariantSampleNames());
+// } while ( rod.parseLine(null,null) );
+//
+// List expecLoc = Arrays.asList("1:123456","1:14327877","2:22074511","3:134787","3:178678","4:829645","4:5234132","12:1268713");
+//
+// for ( int i = 0; i < expecLoc.size(); i ++ ) {
+// Assert.assertEquals("That locus "+(i+1)+" in the rod is correct", expecLoc.get(i), lociInRod.get(i).toString());
+// }
+//
+// List expecAlleles = Arrays.asList("AA","AA","AA","GG","GG","GG","AA","TA","TT","CC","CC","GC","TC","CC","TT",
+// "GG","GG","AG","TT","CC","CT","TG","GG","GG");
+// List expecHet = Arrays.asList(false,false,false,false,false,false,false,true,false,false,false,true,true,false,
+// false,false,false,true,false,false,true,true,false,false);
+// List expecName = Arrays.asList("NA12878","NA12890","NA07000","NA12878","NA12890","NA07000","NA12878","NA12890","NA07000",
+// "NA12878","NA12890","NA07000","NA12878","NA12890","NA07000","NA12878","NA12890","NA07000","NA12878","NA12890","NA07000",
+// "NA12878","NA12890","NA07000");
+// int snpNo = 1;
+// int indiv = 1;
+// int alleleOffset = 0;
+// for ( ArrayList