diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/PlinkRod.java b/java/src/org/broadinstitute/sting/gatk/refdata/PlinkRod.java index 25c4c177c..84ef7f2fa 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/PlinkRod.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/PlinkRod.java @@ -1,105 +1,124 @@ package org.broadinstitute.sting.gatk.refdata; -/* -import org.broadinstitute.sting.oneoffprojects.variantcontext.Allele; -import org.broadinstitute.sting.oneoffprojects.variantcontext.Genotype; -import org.broadinstitute.sting.utils.BaseUtils; + +import org.broadinstitute.sting.gatk.contexts.variantcontext.*; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.StingException; -import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; -import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; -import org.broadinstitute.sting.gatk.CommandLineGATK; import java.io.*; import java.util.*; -import net.sf.samtools.SAMFileHeader; - /** * Created by IntelliJ IDEA. - * User: chartl - * Date: Jan 19, 2010 - * Time: 10:24:18 AM - * To change this template use File | Settings | File Templates. + * User: chartl, ebanks * -public class PlinkRod extends BasicReferenceOrderedDatum implements ReferenceOrderedDatum { + */ +public class PlinkRod extends BasicReferenceOrderedDatum implements Iterator { + + public static final String SEQUENOM_NO_CALL = "0"; + public static final String SEQUENOM_NO_VARIANT = "-"; + private final Set headerEntries = new HashSet(Arrays.asList("#Family ID","Individual ID","Sex", "Paternal ID","Maternal ID","Phenotype", "FID","IID","PAT","MAT","SEX","PHENOTYPE")); private final byte SNP_MAJOR_MODE = 1; - private ArrayList variants; private PlinkVariantInfo currentVariant; private ListIterator variantIterator; private PlinkFileType plinkFileType; public enum PlinkFileType { - STANDARD_PED,RAW_PED,BINARY_PED + STANDARD_PED, RAW_PED, BINARY_PED } -// CONSTRUCTOR - public PlinkRod(String name) { super(name); } + public PlinkRod(String name, PlinkVariantInfo record, ListIterator iter) { + super(name); + currentVariant = record; + variantIterator = iter; + } + @Override public Object initialize(final File plinkFile) throws FileNotFoundException { if ( ! plinkFile.exists() ) { throw new FileNotFoundException("File "+plinkFile.getAbsolutePath()+" does not exist."); } - variants = parsePlinkFile(plinkFile); - if ( variants != null ) { - variantIterator = variants.listIterator(); - currentVariant = variantIterator.next(); - } - - assertNotNull(); + ArrayList variants = parsePlinkFile(plinkFile); + if ( variants == null ) + throw new IllegalStateException("Error parsing Plink file"); + variantIterator = variants.listIterator(); return null; } + public static PlinkRod createIterator(String name, File file) { + PlinkRod plink = new PlinkRod(name); + try { + plink.initialize(file); + } catch (FileNotFoundException e) { + throw new StingException("Unable to find file " + file); + } + return plink; + } + private void assertNotNull() { if ( currentVariant == null ) { - throw new UnsupportedOperationException ( "Current sequenom variant information was set to null" ); + throw new IllegalStateException("Current variant information is null"); } } + public boolean hasNext() { + return variantIterator.hasNext(); + } + + public PlinkRod next() { + if ( !this.hasNext() ) + throw new NoSuchElementException("PlinkRod next called on iterator with no more elements"); + + // get the next record + currentVariant = variantIterator.next(); + return new PlinkRod(name, currentVariant, variantIterator); + } + @Override public boolean parseLine(Object obj, String[] args) { - if ( variantIterator.hasNext() ) { - currentVariant = variantIterator.next(); - return true; - } else { - return false; - } + throw new UnsupportedOperationException("PlinkRod does not support the parseLine method"); + } + + public void remove() { + throw new UnsupportedOperationException("The remove operation is not supported for a PlinkRod"); } @Override public GenomeLoc getLocation() { + assertNotNull(); return currentVariant.getLocation(); } @Override public String toString() { - return currentVariant == null ? "" : currentVariant.toString(); + assertNotNull(); + return currentVariant.toString(); } public String getVariantName() { + assertNotNull(); return currentVariant.getName(); + } - public ArrayList getVariantSampleNames() { - return currentVariant.getSampleNames(); + public VariantContext getVariantContext() { + assertNotNull(); + return currentVariant.getVariantContext(); } - public ArrayList getGenotypes() { - return currentVariant.getGenotypes(); - } - - public boolean variantIsSNP() { - return currentVariant.isSNP(); + public boolean isIndel() { + assertNotNull(); + return currentVariant.isIndel(); } @@ -123,7 +142,7 @@ public class PlinkRod extends BasicReferenceOrderedDatum implements ReferenceOrd /* *** *** *** *** *** ** *** ** *** ** *** ** *** ** *** * * PARSING STANDARD TEXT PED FILES * ** * *** *** *** *** *** ** *** ** *** ** *** ** *** ** ***/ -/* + private ArrayList parseTextFormattedPlinkFile( File file ) { try { BufferedReader reader = new BufferedReader( new FileReader ( file ) ); @@ -131,7 +150,7 @@ public class PlinkRod extends BasicReferenceOrderedDatum implements ReferenceOrd ArrayList seqVars = instantiateVariantListFromHeader(header); ArrayList snpOffsets = getSNPOffsetsFromHeader(header); - String line = null; + String line; do { line = reader.readLine(); incorporateInfo(seqVars,snpOffsets,line); @@ -156,40 +175,29 @@ public class PlinkRod extends BasicReferenceOrderedDatum implements ReferenceOrd } String[] plinkInfo; - if ( plinkFileType == PlinkFileType.STANDARD_PED) { - plinkInfo = plinkLine.split("\t"); - } else { + if ( plinkFileType != PlinkFileType.STANDARD_PED ) throw new StingException("Plink file is likely of .raw or recoded format. Please use an uncoded .ped file."); - } + plinkInfo = plinkLine.split("\t"); String individualName = plinkInfo[1]; int snpNumber = 0; - - if ( plinkFileType == PlinkFileType.STANDARD_PED ) { - for ( int i : offsets ) { - vars.get(snpNumber).addGenotypeEntry(plinkInfo[i], individualName); - snpNumber++; - } + for ( int i : offsets ) { + vars.get(snpNumber).addGenotypeEntry(plinkInfo[i].split("\\s+"), individualName); + snpNumber++; } } private ArrayList instantiateVariantListFromHeader(String header) { - if ( header.startsWith("#") ) {// first line is a comment; what we're used to seeing - plinkFileType = PlinkFileType.STANDARD_PED; - } else {// first line is the raw header; comes from de-binary-ing a .bed file - plinkFileType = PlinkFileType.RAW_PED; + // if the first line is not a comment (what we're used to seeing), + // then it's the raw header (comes from de-binary-ing a .bed file) + if ( !header.startsWith("#") ) throw new StingException("Plink file is likely of .raw or recoded format. Please use an uncoded .ped file."); - } + + plinkFileType = PlinkFileType.STANDARD_PED; ArrayList seqVars = new ArrayList(); - String[] headerFields; - - if ( plinkFileType == PlinkFileType.STANDARD_PED ) { - headerFields = header.split("\t"); - } else { - throw new StingException("Plink file is likely of .raw or recoded format. Please use an uncoded .ped file."); - } + String[] headerFields = header.split("\t"); for ( String field : headerFields ) { if ( ! headerEntries.contains(field) ) { @@ -225,7 +233,7 @@ public class PlinkRod extends BasicReferenceOrderedDatum implements ReferenceOrd /* *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** * * PARSING BINARY PLINK BED/BIM/FAM FILES * * * *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***/ - /* + private ArrayList parseBinaryFormattedPlinkFile(File file) { PlinkBinaryTrifecta binaryFiles = getPlinkBinaryTriplet(file); ArrayList parsedVariants = instantiateVariantsFromBimFile(binaryFiles.bimFile); @@ -269,17 +277,16 @@ public class PlinkRod extends BasicReferenceOrderedDatum implements ReferenceOrd ArrayList variants = new ArrayList(); try { - String line = null; - do { + String line = reader.readLine(); + while ( line != null ) { + String[] snpInfo = line.split("\\s+"); + PlinkVariantInfo variant = new PlinkVariantInfo(snpInfo[1]); + variant.setGenomeLoc(GenomeLocParser.parseGenomeLoc(snpInfo[0],Long.valueOf(snpInfo[3]), Long.valueOf(snpInfo[3]))); + variant.setAlleles(snpInfo[4],snpInfo[5]); + variants.add(variant); + line = reader.readLine(); - if ( line != null ) { - String[] snpInfo = line.split("\\s+"); - PlinkVariantInfo variant = new PlinkVariantInfo(snpInfo[1],true); - variant.setGenomeLoc(GenomeLocParser.parseGenomeLoc(snpInfo[0],Long.valueOf(snpInfo[3]), Long.valueOf(snpInfo[3]))); - variant.setAlleles(snpInfo[4],snpInfo[5]); - variants.add(variant); - } - } while ( line != null ); + } } catch ( IOException e ) { throw new StingException("There was an error reading the .bim file "+bimFile.getAbsolutePath(), e); } @@ -300,7 +307,7 @@ public class PlinkRod extends BasicReferenceOrderedDatum implements ReferenceOrd ArrayList sampleNames = new ArrayList(); try { - String line = null; + String line; do { line = reader.readLine(); if ( line != null ) { @@ -325,7 +332,7 @@ public class PlinkRod extends BasicReferenceOrderedDatum implements ReferenceOrd } try { - byte genotype = -1; + byte genotype; long bytesRead = 0; int snpOffset = 0; int sampleOffset = 0; @@ -397,25 +404,26 @@ public class PlinkRod extends BasicReferenceOrderedDatum implements ReferenceOrd class PlinkVariantInfo implements Comparable { - private enum AlleleType { - INSERTION,DELETION - } - private String variantName; private GenomeLoc loc; - private ArrayList genotypes = new ArrayList(); - private ArrayList sampleNames = new ArrayList(); + private HashSet alleles = new HashSet(); + private HashSet genotypes = new HashSet(); - private ArrayList indelHolder; - private ArrayList sampleHolder; - private int siteIndelLength; - private AlleleType indelType; + // for indels + private boolean isIndel = false; + private boolean isInsertion = false; + private int indelLength = 0; // for binary parsing - private String locAllele1; private String locAllele2; + + public PlinkVariantInfo(String variantName) { + this.variantName = variantName; + parseName(); + } + public GenomeLoc getLocation() { return loc; } @@ -424,79 +432,82 @@ class PlinkVariantInfo implements Comparable { return variantName; } - public ArrayList getSampleNames() { - return sampleNames; + public VariantContext getVariantContext() { + try { + return new VariantContext(variantName, loc, alleles, genotypes); + } catch (IllegalArgumentException e) { + throw new IllegalArgumentException(e.getMessage() + "; please make sure that e.g. a sample isn't present more than one time in your ped file"); + } } - public ArrayList getGenotypes() { - return genotypes; - } - - public boolean isSNP() { - return this.indelType == null; + public boolean isIndel() { + return isIndel; } public void setGenomeLoc(GenomeLoc loc) { this.loc = loc; } + private void parseName() { + int chromIdx = variantName.indexOf("|c"); + if ( chromIdx == -1 ) + throw new IllegalArgumentException("Variant name " + variantName + " does not adhere to required convention (...|c...)"); + String[] pieces = variantName.substring(chromIdx+2).split("_"); + if ( pieces.length < 2 ) + throw new IllegalArgumentException("Variant name " + variantName + " does not adhere to required convention (...|c..._p...)"); + + String chrom = pieces[0]; + if ( pieces[1].charAt(0) != 'p' ) + throw new IllegalArgumentException("Variant name " + variantName + " does not adhere to required convention (...|c..._p...)"); + + String pos = pieces[1].substring(1); + loc = GenomeLocParser.parseGenomeLoc(chrom+":"+pos); + + if ( pieces.length > 2 && (pieces[2].equals("gI") || pieces[2].equals("gD")) ) { + // it's an indel + isIndel = true; + isInsertion = pieces[2].equals("gI"); + } + } + public void setAlleles(String al1, String al2) { - if ( al1.equals("0") ) { + if ( al1.equals(PlinkRod.SEQUENOM_NO_CALL) ) { // encoding for a site at which no variants were detected locAllele1 = al2; } else { locAllele1 = al1; } locAllele2 = al2; - if ( ! isSNP() ) { - siteIndelLength = Math.max(locAllele1.length(),locAllele2.length()); + } + + public void addGenotypeEntry(String[] alleleStrings, String sampleName) { + + ArrayList myAlleles = new ArrayList(2); + + for ( String alleleString : alleleStrings ) { + if ( alleleString.equals(PlinkRod.SEQUENOM_NO_CALL) ) { + myAlleles.add(Allele.NO_CALL); + } else { + Allele allele; + if ( !isIndel ) { + allele = new Allele(alleleString); + } else if ( alleleString.equals(PlinkRod.SEQUENOM_NO_VARIANT) ) { + allele = new Allele(alleleString, isInsertion); + } else { + allele = new Allele(alleleString, !isInsertion); + if ( indelLength == 0 ) { + indelLength = alleleString.length(); + loc = GenomeLocParser.parseGenomeLoc(loc.getContig(), loc.getStart(), loc.getStart()+indelLength-1); + } + } + + myAlleles.add(allele); + if ( alleles.size() < 2 ) + alleles.add(allele); + } } - } - - // CONSTRUCTOR - - public PlinkVariantInfo(String variantName) { - this.variantName = variantName; - parseName(); - } - - public PlinkVariantInfo(String variantName, boolean onlyLookForIndelInfo ) { - this.variantName = variantName; - if ( onlyLookForIndelInfo ) { - parseNameForIndels(); - } else { - parseName(); - } - } - - private void parseName() { - String chrom = this.variantName.split("\\|c")[1].split("_")[0]; - String pos = this.variantName.split("_p")[1].split("_")[0]; - this.loc = GenomeLocParser.parseGenomeLoc(chrom+":"+pos); - this.parseNameForIndels(); - } - - private void parseNameForIndels() { - if ( this.variantName.contains("_gI") || this.variantName.contains("_gD") ) { - this.instantiateIndel(); - } - } - - private void instantiateIndel() { - this.indelHolder = new ArrayList(); - this.sampleHolder = new ArrayList(); - this.siteIndelLength = -1; - this.indelType = this.variantName.contains("_gI") ? AlleleType.INSERTION : AlleleType.DELETION; - } - - public void addGenotypeEntry(String genotypeString, String sampleName) { - // identify if we're dealing with a deletion - if ( this.isSNP() ) { - this.addSNP(genotypeString.split("\\s+"),sampleName); - } else { - this.addIndel(genotypeString.split("\\s+"),sampleName); - } + genotypes.add(new Genotype(sampleName, myAlleles, 20.0)); } public void addBinaryGenotypeEntry( int genoTYPE, String sampleName ) { @@ -515,139 +526,7 @@ class PlinkVariantInfo implements Comparable { alleleStr[1] = "0"; } - if ( this.isSNP() ) { - this.addSNP(alleleStr,sampleName); - } else { - this.addIndel(alleleStr,sampleName); - } - } - - private void addSNP(String[] alleleStrings, String sampleName) { - ArrayList alleles = new ArrayList(2); - - for ( String alStr : alleleStrings ) { - alleles.add(new Allele(alStr.getBytes())); - } - - genotypes.add(new Genotype(alleles,sampleName,20.0) ); - sampleNames.add(sampleName); - } - - private void addIndel(String[] alleleStrings, String sampleName) { - String alleleStr1 = alleleStrings[0]; - String alleleStr2 = alleleStrings[1]; - if ( alleleStr1.contains("-") ^ alleleStr2.contains("-") ) { - // heterozygous indel - if ( alleleStr1.contains("-") ) { - this.addHetIndel(alleleStr2,sampleName) ; - } else { - this.addHetIndel(alleleStr1,sampleName); - } - } else { - this.addHomIndel(alleleStr1, alleleStr2, sampleName); - } - } - - private void addHetIndel(String baseStr, String sampleName) { - Allele ref; - Allele alt; - - if ( indelType == AlleleType.INSERTION ) { - ref = new Allele("-",true); - alt = new Allele(baseStr.getBytes(),false); - } else { - alt = new Allele("-",false); - ref = new Allele(baseStr.getBytes(),true); - } - - // this.setIndelLength(alt,baseStr.length()); - - if ( ! indelHolder.isEmpty() ) { - siteIndelLength = baseStr.length(); - this.addHeldIndels(); - } - - Genotype indel = new Genotype(Arrays.asList(ref,alt), sampleName, 20.0); - // this.setIndelGenotypeLength(indel,siteIndelLength); - this.genotypes.add(indel); - this.sampleNames.add(sampleName); - } - - private void addHomIndel(String strand1, String strand2, String sampleName) { - Allele allele1; - Allele allele2; - boolean reference; - if ( indelType == AlleleType.DELETION ) { - if ( strand1.contains("-") ) { - // homozygous deletion - allele1 = new Allele("-",false); - allele2 = new Allele("-",false); - reference = false; - } else { // homozygous reference at a deletion variant site - allele1 = new Allele(strand1.getBytes(),true); - allele2 = new Allele(strand2.getBytes(),true); - reference = true; - } - } else { - if ( strand1.contains("-") ) { - // homozygous reference - allele1 = new Allele("-",true); - allele2 = new Allele("-",true); - reference = true; - } else { - allele1 = new Allele(strand1.getBytes(),false); - allele2 = new Allele(strand2.getBytes(),false); - reference = false; - } - } - - if ( reference ) { - if ( ! indelHolder.isEmpty() ) { - siteIndelLength = strand1.length(); - this.addHeldIndels(); - } - } - - if ( reference || siteIndelLength != -1 ) { // if we're ref or know the insertion/deletion length of the site - Genotype gen = new Genotype(Arrays.asList(allele1,allele2), sampleName, 20.0); - // setIndelGenotypeLength(gen,siteIndelLength); - this.genotypes.add(gen); - this.sampleNames.add(sampleName); - } else { // hold on the variants until we *do* know the in/del length at this site - this.indelHolder.add(allele1); - this.indelHolder.add(allele2); - this.sampleHolder.add(sampleName); - } - - } - -/* private void setIndelGenotypeLength(Genotype g, int length) { - g.setAttribute(Genotype.StandardAttributes.DELETION_LENGTH,length); - } - - private void addHeldIndels() { - Allele del1; - Allele del2; - int startingSize = indelHolder.size(); - for ( int i = 0; i < startingSize ; i+=2 ) { - del1 = indelHolder.get(i); - del2 = indelHolder.get(i+1); - this.addIndelFromCache(del1,del2,sampleHolder.get(i/2)); - if ( indelHolder.size() != startingSize ) { - throw new StingException("Halting algorithm -- possible infinite loop"); - } - } - indelHolder.clear(); - sampleHolder.clear(); - } - - private void addIndelFromCache ( Allele indel1, Allele indel2, String sampleName ) { - this.setIndelLength(indel1,siteIndelLength); - this.setIndelLength(indel2,siteIndelLength); - Genotype indel = new Genotype(Arrays.asList(indel1,indel2),sampleName, 20.0); - // this.setIndelGenotypeLength(indel,siteIndelLength); - this.genotypes.add(indel); - this.sampleNames.add(sampleName); + addGenotypeEntry(alleleStr, sampleName); } public int compareTo(Object obj) { @@ -657,22 +536,14 @@ class PlinkVariantInfo implements Comparable { return loc.compareTo(((PlinkVariantInfo) obj).getLocation()); } - - private void setIndelLength(Allele al, int length) { - // Todo -- once alleles support deletion lengths add that information - // Todo -- into the object; for now this can just return - return; - } } class PlinkBinaryTrifecta { - public PlinkBinaryTrifecta() { - - } + public PlinkBinaryTrifecta() {} public File bedFile; public File bimFile; public File famFile; -} */ \ No newline at end of file +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/ReferenceOrderedData.java b/java/src/org/broadinstitute/sting/gatk/refdata/ReferenceOrderedData.java index 19b66548a..82022b885 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/ReferenceOrderedData.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/ReferenceOrderedData.java @@ -111,6 +111,7 @@ public class ReferenceOrderedData implements addModule("PicardDbSNP", rodPicardDbSNP.class); addModule("HapmapVCF", HapmapVCFROD.class); addModule("Beagle", BeagleROD.class); + addModule("Plink", PlinkRod.class); } diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/RodGeliText.java b/java/src/org/broadinstitute/sting/gatk/refdata/RodGeliText.java index 9fa1a2bf7..f648086dc 100755 --- a/java/src/org/broadinstitute/sting/gatk/refdata/RodGeliText.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/RodGeliText.java @@ -366,7 +366,7 @@ public class RodGeliText extends BasicReferenceOrderedDatum implements Variation */ @Override public Genotype getCalledGenotype() { - return new GeliGenotypeCall(refBase, getLocation(), bestGenotype, lodBtnb); + return new GeliGenotypeCall(Character.toString(refBase), getLocation(), bestGenotype, lodBtnb); } /** @@ -377,7 +377,7 @@ public class RodGeliText extends BasicReferenceOrderedDatum implements Variation @Override public List getGenotypes() { List ret = new ArrayList(); - ret.add(new GeliGenotypeCall(refBase, getLocation(), bestGenotype, lodBtnb)); + ret.add(new GeliGenotypeCall(Character.toString(refBase), getLocation(), bestGenotype, lodBtnb)); return ret; } diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/RodGenotypeChipAsGFF.java b/java/src/org/broadinstitute/sting/gatk/refdata/RodGenotypeChipAsGFF.java index fa53b011e..5a313e5d1 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/RodGenotypeChipAsGFF.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/RodGenotypeChipAsGFF.java @@ -275,7 +275,7 @@ public class RodGenotypeChipAsGFF extends BasicReferenceOrderedDatum implements */ @Override public Genotype getCalledGenotype() { - return new BasicGenotype(this.getLocation(),this.feature,this.getRefSnpFWD(),this.getConsensusConfidence()); + return new BasicGenotype(this.getLocation(),this.feature,Character.toString(this.getRefSnpFWD()),this.getConsensusConfidence()); } /** @@ -286,7 +286,7 @@ public class RodGenotypeChipAsGFF extends BasicReferenceOrderedDatum implements @Override public List getGenotypes() { List ret = new ArrayList(); - ret.add(new BasicGenotype(this.getLocation(),this.feature,this.getRefSnpFWD(),this.getConsensusConfidence())); + ret.add(new BasicGenotype(this.getLocation(),this.feature,Character.toString(this.getRefSnpFWD()),this.getConsensusConfidence())); return ret; } diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java b/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java index c603b3bb2..333608e2d 100755 --- a/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java @@ -196,14 +196,7 @@ public class VariantContextAdaptors { public static VCFRecord toVCF(VariantContext vc) { // deal with the reference - char referenceBase = 'N'; // by default we'll use N - if ( vc.getReference().length() == 1 ) { - referenceBase = (char)vc.getReference().getBases()[0]; - } - - if ( ! vc.isSNP() ) - // todo -- update the code so it works correctly with indels - throw new StingException("VariantContext -> VCF converter currently doesn't support indels; complain to the GATK team"); + String referenceBase = new String(vc.getReference().getBases()); String contig = vc.getLocation().getContig(); long position = vc.getLocation().getStart(); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeCalculationModel.java index 45f234524..87d42da5b 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeCalculationModel.java @@ -95,7 +95,7 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul for ( String sample : GLs.keySet() ) { // create the call - GenotypeCall call = GenotypeWriterFactory.createSupportedGenotypeCall(OUTPUT_FORMAT, ref, loc); + GenotypeCall call = GenotypeWriterFactory.createSupportedGenotypeCall(OUTPUT_FORMAT, Character.toString(ref), loc); // set the genotype and confidence Pair AFbasedGenotype = matrix.getGenotype(frequency, sample); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java index 02e6f22f1..c79d785b9 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java @@ -347,7 +347,7 @@ public class VariantEvalWalker extends RodWalker { private RodVCF fakeVCFForSample(RodVCF eval, ReferenceContext ref, final String sampleName) { VCFGenotypeRecord genotype = (VCFGenotypeRecord)eval.getGenotype(sampleName); if ( genotype.getNegLog10PError() > 0 ) { - VCFRecord record = new VCFRecord(ref.getBase(), ref.getLocus(), "GT"); + VCFRecord record = new VCFRecord(Character.toString(ref.getBase()), ref.getLocus(), "GT"); record.setAlternateBases(eval.getRecord().getAlternateAlleles()); record.addGenotypeRecord(genotype); record.setQual(10*genotype.getNegLog10PError()); diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/DeNovoSNPWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/DeNovoSNPWalker.java index 7b27e6a2a..2f8909817 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/DeNovoSNPWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/DeNovoSNPWalker.java @@ -83,9 +83,9 @@ public class DeNovoSNPWalker extends RefWalker{ Genotype parent1call = parent1.genotypes.get(0); Genotype parent2call = parent2.genotypes.get(0); - if (!parent1call.isVariant(parent1call.getReference()) && + if (!parent1call.isVariant(parent1call.getReference().charAt(0)) && parent1call.getNegLog10PError() > 5 && - !parent2call.isVariant(parent2call.getReference()) && + !parent2call.isVariant(parent2call.getReference().charAt(0)) && parent2call.getNegLog10PError() > 5) { double sumConfidences = 0.5 * (0.5 * child.getNegLog10PError() + diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/SnpCallRateByCoverageWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/SnpCallRateByCoverageWalker.java index 2afe1907a..88c31cd2e 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/SnpCallRateByCoverageWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/SnpCallRateByCoverageWalker.java @@ -81,7 +81,7 @@ public class SnpCallRateByCoverageWalker extends LocusWalker, Strin VariantCallContext calls = UG.map(tracker, ref, subContext); if (calls != null && calls.genotypes != null && calls.genotypes.size() > 0) { Genotype call = calls.genotypes.get(0); - String callType = (call.isVariant(call.getReference())) ? ((call.isHom()) ? "HomozygousSNP" : "HeterozygousSNP") : "HomozygousReference"; + String callType = (call.isVariant(call.getReference().charAt(0))) ? ((call.isHom()) ? "HomozygousSNP" : "HeterozygousSNP") : "HomozygousReference"; GenotypeCalls.add(coverage+"\t"+coverage_available+"\t"+hc_genotype+"\t"+callType+"\t"+toGeliString(call)); } } @@ -113,7 +113,7 @@ public class SnpCallRateByCoverageWalker extends LocusWalker, Strin double nextVrsBest = 0; double nextVrsRef = 0; - char ref = locus.getReference(); + char ref = locus.getReference().charAt(0); if (locus instanceof ReadBacked) { readDepth = ((ReadBacked)locus).getReadCount(); diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantstovcf/HapMap2VCF.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantstovcf/HapMap2VCF.java index 90bcdae92..90753ca7a 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantstovcf/HapMap2VCF.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantstovcf/HapMap2VCF.java @@ -63,7 +63,7 @@ public class HapMap2VCF extends RodWalker { VCFGenotypeEncoding refAllele = new VCFGenotypeEncoding(ref_allele.toString()); // Create a new record - VCFRecord record = new VCFRecord(ref_allele, context.getLocation(), "GT"); + VCFRecord record = new VCFRecord(Character.toString(ref_allele), context.getLocation(), "GT"); // Record each sample's genotype info String[] hapmap_rod_genotypes = hapmap_rod.getGenotypes(); diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantstovcf/PlinkToVCF.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantstovcf/PlinkToVCF.java index 384acc284..4ec5104f3 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantstovcf/PlinkToVCF.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantstovcf/PlinkToVCF.java @@ -2,14 +2,15 @@ package org.broadinstitute.sting.playground.gatk.walkers.variantstovcf; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.contexts.variantcontext.*; import org.broadinstitute.sting.gatk.walkers.annotator.HardyWeinberg; import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.RefWalker; +import org.broadinstitute.sting.gatk.refdata.PlinkRod; +import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; +import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors; +import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.utils.cmdLine.Argument; -import org.broadinstitute.sting.utils.genotype.DiploidGenotype; -import org.broadinstitute.sting.utils.genotype.Genotype; -import org.broadinstitute.sting.utils.genotype.Variation; import org.broadinstitute.sting.utils.genotype.vcf.*; import java.io.BufferedReader; @@ -21,9 +22,7 @@ import java.util.*; /** * Converts Sequenom files to a VCF annotated with QC metrics (HW-equilibrium, % failed probes) */ -public class PlinkToVCF extends RefWalker { - @Argument(fullName="sequenomePedFile", shortName="sPed", doc="The sequenome file from which to generate a VCF", required=true) - public File seqFile = null; +public class PlinkToVCF extends RodWalker { @Argument(fullName="outputVCF", shortName="vcf", doc="The VCF file to write to", required=true) public File vcfFile = null; @Argument(fullName="numberOfSamples", shortName="ns", doc="The number of samples sequenced", required=false) @@ -44,7 +43,6 @@ public class PlinkToVCF extends RefWalker { "FID","IID","PAT","MAT","SEX","PHENOTYPE")); private final int INIT_NUMBER_OF_POPULATIONS = 10; private final int DEFAULT_QUALITY = 20; - private HashMap sequenomResults = new HashMap(); private ArrayList sampleNames = new ArrayList(nSamples); private VCFGenotypeWriterAdapter vcfWriter; private final HardyWeinberg HWCalc = new HardyWeinberg(); @@ -52,17 +50,15 @@ public class PlinkToVCF extends RefWalker { private HashMap samplesToPopulation; public void initialize() { - //System.out.println("Initializing... parse sequenom file"); - parseSequenomFile(seqFile); vcfWriter = new VCFGenotypeWriterAdapter(vcfFile); if ( useSmartHardy ) { samplesToPopulation = parsePopulationFile(popFile); } Set hInfo = new HashSet(); hInfo.addAll(VCFUtils.getHeaderFields(getToolkit())); - hInfo.add(new VCFHeaderLine("source", "Sequenom2VCF")); + hInfo.add(new VCFHeaderLine("source", "PlinkToVCF")); hInfo.add(new VCFHeaderLine("reference", getToolkit().getArguments().referenceFile.getName())); - vcfWriter.writeHeader(new TreeSet(sampleNames),hInfo); + vcfWriter.writeHeader(new TreeSet(sampleNames), hInfo); nSamples = sampleNames.size(); } @@ -72,21 +68,32 @@ public class PlinkToVCF extends RefWalker { } public VCFRecord map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - if ( sequenomResults.containsKey(context.getLocation().toString()) ) { - SequenomVariantInfo varInfo = sequenomResults.remove(context.getLocation().toString()); - return addVariantInformationToCall(ref,varInfo); - } else { + if ( tracker == null ) return null; + + // get the Plink rod at this locus if there is one + PlinkRod plinkRod = null; + Iterator rods = tracker.getAllRods().iterator(); + while (rods.hasNext()) { + ReferenceOrderedDatum rod = rods.next(); + if ( rod instanceof PlinkRod ) { + plinkRod = (PlinkRod)rod; + break; + } } + + if ( plinkRod == null ) + return null; + + return addVariantInformationToCall(ref, plinkRod); } public Integer reduce(VCFRecord call, Integer numVariants) { - if ( call == null ) { - return numVariants; - } else { + if ( call != null ) { + numVariants++; printToVCF(call); - return 1 + numVariants; } + return numVariants; } public void onTraversalDone(Integer finalReduce) { @@ -106,51 +113,25 @@ public class PlinkToVCF extends RefWalker { } } - private VCFRecord addVariantInformationToCall(ReferenceContext ref, SequenomVariantInfo varInfo) { - int numNoCalls = 0; - int numHomNonrefCalls = 0; - int numNonrefAlleles = 0; + private VCFRecord addVariantInformationToCall(ReferenceContext ref, PlinkRod plinkRod) { - int sampleNumber = 0; - VCFRecord record = new VCFRecord(ref.getBase(),ref.getLocus(),"GT"); + VariantContext vContext = plinkRod.getVariantContext(); + VCFRecord record = VariantContextAdaptors.toVCF(vContext); + record.setGenotypeFormatString("GT"); - for ( String genTypeStr : varInfo.getGenotypes() ) { - if ( genTypeStr.indexOf("0") == -1 ) { - VCFGenotypeEncoding allele1 = new VCFGenotypeEncoding(genTypeStr.substring(0,1)); - VCFGenotypeEncoding allele2 = new VCFGenotypeEncoding(genTypeStr.substring(1)); - List alleles = new ArrayList(2); - alleles.add(allele1); - alleles.add(allele2); + int numNoCalls = vContext.getNoCallCount(); + int numHomVarCalls = vContext.getHomVarCount(); + int numHetCalls = vContext.getHetCount(); - VCFGenotypeRecord genotype = new VCFGenotypeRecord(sampleNames.get(sampleNumber), alleles, VCFGenotypeRecord.PHASE.UNPHASED); - genotype.setField("GQ",String.format("%d",DEFAULT_QUALITY)); - if ( genotype.isVariant(ref.getBase()) ) { - if ( genotype.isHom() ) { - numHomNonrefCalls++; - numNonrefAlleles+=2; - record.addAlternateBase(allele1); - } else { - numNonrefAlleles++; - record.addAlternateBase(allele1.getBases().equalsIgnoreCase(String.format("%c",ref.getBase())) ? allele2 : allele1); - } - } - - record.addGenotypeRecord(genotype); - - } else { - numNoCalls++; - } - sampleNumber++; - } double noCallProp = ( (double) numNoCalls )/( (double) sampleNames.size()); - double homNonRProp = ( (double) numHomNonrefCalls )/( (double) sampleNames.size() - numNoCalls); + double homNonRProp = ( (double) numHomVarCalls )/( (double) sampleNames.size() - numNoCalls); record.setQual(DEFAULT_QUALITY); String hw = hardyWeinbergCalculation(ref,record); double hwScore = hw != null ? Double.valueOf(hw) : -0.0; - record.addInfoFields(generateInfoField(record, numNoCalls,numHomNonrefCalls,numNonrefAlleles,ref, varInfo, hwScore)); + // TODO -- record.addInfoFields(generateInfoField(record, numNoCalls,numHomVarCalls,numNonrefAlleles,ref, plinkRod, hwScore)); if ( hwScore > maxHardy ) { record.setFilterString("Hardy-Weinberg"); } else if ( noCallProp > maxNoCall ) { @@ -174,7 +155,7 @@ public class PlinkToVCF extends RefWalker { } private Map generateInfoField(VCFRecord rec, int nocall, int homnonref, int allnonref, - ReferenceContext ref, SequenomVariantInfo info, double hwScore) { + ReferenceContext ref, PlinkRod info, double hwScore) { double propNoCall = ( ( double ) nocall / (double) nSamples ); double propHomNR = ( (double) homnonref / (double) nSamples ); HashMap infoMap = new HashMap(1); @@ -206,13 +187,13 @@ public class PlinkToVCF extends RefWalker { for ( String name : sampleNames ) { String pop = samplesToPopulation.get(name); if ( rec.getGenotype(name) != null ) { - genotypesByPopulation.get(pop).add(rec.getGenotype(name)); + // TODO -- genotypesByPopulation.get(pop).add(rec.getGenotype(name)); } } for ( String population : samplesToPopulation.values() ) { VCFVariationCall v = new VCFVariationCall(ref.getBase(),ref.getLocus(),VCFVariationCall.VARIANT_TYPE.SNP); - v.setGenotypeCalls(genotypesByPopulation.get(population)); + // TODO -- v.setGenotypeCalls(genotypesByPopulation.get(population)); hardyWeinbergByPopulation.put(population,HWCalc.annotate(null,ref,null,v)); } @@ -229,111 +210,6 @@ public class PlinkToVCF extends RefWalker { return String.format("%s",maxH); } - private void parseSequenomFile(File sequenomFile) { - BufferedReader reader; - try { - reader = new BufferedReader(new FileReader(sequenomFile)); - } catch ( IOException e ) { - throw new StingException("Sequenom file could not be opened",e); - } - String header; - try { - header = reader.readLine(); - //System.out.println("Read header line, it was "+header); - } catch (IOException e) { - throw new StingException(e.getMessage(),e); - } - HashMap sequenomVariants = parseSequenomHeader(header); - try { - String line; - do { - line = reader.readLine(); - //ystem.out.println("Read line, it was"+line); - if ( line != null ) { - //System.out.println("Parsing line..."); - parseSequenomLine(sequenomVariants,line); - } - } while ( line != null); - - reader.close(); - convertToLocusMap(sequenomVariants); - - } catch ( IOException e) { - throw new StingException("Error reading sequenom file", e); - } - } - - private HashMap parseSequenomHeader(String header) { - // the header will give us all the probe names and contigs - //System.out.println("Parsing header"); - HashMap variantsByFileOffset = new HashMap(); - String[] fields = header.split("\t"); - int fieldOffset = 0; - for ( String entry : fields ) { - if ( ! HEADER_FIELDS.contains(entry) ) { - //System.out.println(entry); - // actually a SNP - String snpName = entry.split("\\|")[1]; - //System.out.println("Entry: "+entry+" Name: "+snpName); - variantsByFileOffset.put(fieldOffset,new SequenomVariantInfo(snpName,nSamples)); - //System.out.println("Adding entry for offset "+fieldOffset); - } - fieldOffset++; - } - - return variantsByFileOffset; - } - - private void parseSequenomLine(HashMap variants, String line) { - if ( line == null ) { - // early return - //System.out.println("Line was null in file"); - return; - } - String[] entries = line.split("\t"); - int offset = 0; - if ( entries[1].equalsIgnoreCase("empty")) { - return; - } - sampleNames.add(entries[1]); - for ( String entry : entries ) { - if ( variants.containsKey(offset) ) { // actual SNP - variants.get(offset).addGenotype(entry); - //System.out.println("Added: "+entry+"To "+offset); - } - offset++; - } - } - - private void convertToLocusMap(HashMap variants) { - for ( SequenomVariantInfo variant : variants.values() ) { - //System.out.println("Variant name: "+variant.getName()); - String loc = genomeLocFromVariantName(variant.getName()); - //System.out.println(variant.getName()); - if ( loc == null ) { - throw new StingException("Genome locus was null"); - } - sequenomResults.put(loc,variant); - } - - variants.clear(); - variants = null; - } - - private String genomeLocFromVariantName(String name) { - String[] nameInfo = name.split("_"); - //System.out.println(name); - //System.out.println(nameInfo[0]); - String chr = nameInfo[0].substring(1); // format: c3 becomes 3 - String pos = nameInfo[1].substring(1); // format: p9961104 becomes 9961104 - if ( useb36contigs ) { - return chr+":"+pos; - } else { - return "chr"+chr+":"+pos; - } - - } - private HashMap parsePopulationFile(File file) { HashMap samplesToPopulation = new HashMap(); try { @@ -352,29 +228,4 @@ public class PlinkToVCF extends RefWalker { return samplesToPopulation; } - - -} - -class SequenomVariantInfo { - private ArrayList genotypes; - private String variantName; - - public SequenomVariantInfo(String name, int nPeople) { - genotypes = new ArrayList(nPeople); - variantName = name; - } - - public void addGenotype(String genotype) { - String[] alleles = genotype.split(" "); - genotypes.add(alleles[0]+alleles[1]); - } - - public String getName() { - return variantName; - } - - public ArrayList getGenotypes() { - return genotypes; - } } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantstovcf/VariantsToVCF.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantstovcf/VariantsToVCF.java index 7ee3fefee..7b548ce15 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantstovcf/VariantsToVCF.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantstovcf/VariantsToVCF.java @@ -186,7 +186,7 @@ public class VariantsToVCF extends RefWalker { if (dbsnp != null) info.put("DB","1"); if (dbsnp != null && dbsnp.isHapmap()) info.put("H2","1"); - return new VCFRecord(ref.getBase(), + return new VCFRecord(Character.toString(ref.getBase()), context.getContig(), (int) context.getPosition(), (dbsnp == null) ? "." : dbsnp.getRS_ID(), diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/vcftools/VCFSubsetWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/vcftools/VCFSubsetWalker.java index 4ee3584b7..7c5a316e7 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/vcftools/VCFSubsetWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/vcftools/VCFSubsetWalker.java @@ -90,7 +90,7 @@ public class VCFSubsetWalker extends RodWalker, VCFWriter> } } - VCFRecord subset = new VCFRecord(record.getReferenceBase(), + VCFRecord subset = new VCFRecord(record.getReference(), record.getLocation().getContig(), (int) record.getLocation().getStart(), record.getID(), @@ -110,7 +110,7 @@ public class VCFSubsetWalker extends RodWalker, VCFWriter> boolean isVariant = false; for (VCFGenotypeEncoding ge : ((VCFGenotypeRecord)subset.getGenotypes().get(0)).getAlleles()) { - if (record.getReferenceBase() != ge.getBases().charAt(0)) { + if (!record.getReference().equals(ge.getBases())) { isVariant = true; } } diff --git a/java/src/org/broadinstitute/sting/playground/tools/vcf/VCFCallRates.java b/java/src/org/broadinstitute/sting/playground/tools/vcf/VCFCallRates.java index 78f2875b0..2da54cd8f 100644 --- a/java/src/org/broadinstitute/sting/playground/tools/vcf/VCFCallRates.java +++ b/java/src/org/broadinstitute/sting/playground/tools/vcf/VCFCallRates.java @@ -73,7 +73,7 @@ class VCFCallRates extends CommandLineProgram record = reader.next(); } - char ref = record.getReferenceBase(); + char ref = record.getReference().charAt(0); String[] new_sample_names = record.getSampleNames(); if (new_sample_names.length != sample_names.length) { throw new RuntimeException(); } diff --git a/java/src/org/broadinstitute/sting/playground/tools/vcf/VCFSequenomAnalysis.java b/java/src/org/broadinstitute/sting/playground/tools/vcf/VCFSequenomAnalysis.java index 5d8fd5b40..b15194bff 100644 --- a/java/src/org/broadinstitute/sting/playground/tools/vcf/VCFSequenomAnalysis.java +++ b/java/src/org/broadinstitute/sting/playground/tools/vcf/VCFSequenomAnalysis.java @@ -91,7 +91,7 @@ class VCFSequenomAnalysis extends CommandLineProgram continue; } - char ref = record1.getReferenceBase(); + char ref = record1.getReference().charAt(0); char alt = VCFTool.getAlt(record2); int n_total_sequenom = VCFTool.Compute_n_total(record1); diff --git a/java/src/org/broadinstitute/sting/playground/tools/vcf/VCFSequenomAnalysis2.java b/java/src/org/broadinstitute/sting/playground/tools/vcf/VCFSequenomAnalysis2.java index c91b96085..d643eecdc 100644 --- a/java/src/org/broadinstitute/sting/playground/tools/vcf/VCFSequenomAnalysis2.java +++ b/java/src/org/broadinstitute/sting/playground/tools/vcf/VCFSequenomAnalysis2.java @@ -95,7 +95,7 @@ class VCFSequenomAnalysis2 extends CommandLineProgram String[] sample_names = record2.getSampleNames(); - char ref = record1.getReferenceBase(); + char ref = record1.getReference().charAt(0); char alt = VCFTool.getAlt(record2); int n_total_sequenom = VCFTool.Compute_n_total(record1, sample_names); @@ -211,8 +211,8 @@ class VCFSequenomAnalysis2 extends CommandLineProgram Arrays.sort(c); g = new String(c); if (g.equals("..")) { continue; } - if (g.charAt(0) == sequencing.getReferenceBase()) { n_ref += 1; } else { n_alt += 1; singleton_name = sample_names[i]; } - if (g.charAt(1) == sequencing.getReferenceBase()) { n_ref += 1; } else { n_alt += 1; singleton_name = sample_names[i]; } + if (g.charAt(0) == sequencing.getReference().charAt(0)) { n_ref += 1; } else { n_alt += 1; singleton_name = sample_names[i]; } + if (g.charAt(1) == sequencing.getReference().charAt(0)) { n_ref += 1; } else { n_alt += 1; singleton_name = sample_names[i]; } } if (n_alt != 1) { throw new RuntimeException(); } if (singleton_name.equals("")) { throw new RuntimeException(); } @@ -234,8 +234,8 @@ class VCFSequenomAnalysis2 extends CommandLineProgram Arrays.sort(c); g = new String(c); if (g.equals("..")) { continue; } - if (g.charAt(0) == sequenom.getReferenceBase()) { n_ref += 1; } else { n_alt += 1; singleton_name = sample_names[i]; } - if (g.charAt(1) == sequenom.getReferenceBase()) { n_ref += 1; } else { n_alt += 1; singleton_name = sample_names[i]; } + if (g.charAt(0) == sequenom.getReference().charAt(0)) { n_ref += 1; } else { n_alt += 1; singleton_name = sample_names[i]; } + if (g.charAt(1) == sequenom.getReference().charAt(0)) { n_ref += 1; } else { n_alt += 1; singleton_name = sample_names[i]; } break; } } @@ -263,8 +263,8 @@ class VCFSequenomAnalysis2 extends CommandLineProgram Arrays.sort(c); g = new String(c); if (g.equals("..")) { continue; } - if (g.charAt(0) == sequencing.getReferenceBase()) { n_ref += 1; } else { n_alt += 1; } - if (g.charAt(1) == sequencing.getReferenceBase()) { n_ref += 1; } else { n_alt += 1; } + if (g.charAt(0) == sequencing.getReference().charAt(0)) { n_ref += 1; } else { n_alt += 1; } + if (g.charAt(1) == sequencing.getReference().charAt(0)) { n_ref += 1; } else { n_alt += 1; } if (n_alt == 1) { het_samples.add(sample_names[i]); } } @@ -291,8 +291,8 @@ class VCFSequenomAnalysis2 extends CommandLineProgram Arrays.sort(c); g = new String(c); if (g.equals("..")) { dropped_hets += 1; continue; } - if (g.charAt(0) == sequenom.getReferenceBase()) { n_ref += 1; } else { n_alt += 1; } - if (g.charAt(1) == sequenom.getReferenceBase()) { n_ref += 1; } else { n_alt += 1; } + if (g.charAt(0) == sequenom.getReference().charAt(0)) { n_ref += 1; } else { n_alt += 1; } + if (g.charAt(1) == sequenom.getReference().charAt(0)) { n_ref += 1; } else { n_alt += 1; } if (n_alt == 1) { matched_hets += 1; } else { mismatched_hets += 1; } } diff --git a/java/src/org/broadinstitute/sting/playground/tools/vcf/VCFTool.java b/java/src/org/broadinstitute/sting/playground/tools/vcf/VCFTool.java index 1ba3688a4..80e8161e9 100644 --- a/java/src/org/broadinstitute/sting/playground/tools/vcf/VCFTool.java +++ b/java/src/org/broadinstitute/sting/playground/tools/vcf/VCFTool.java @@ -263,7 +263,7 @@ class CheckRefFields extends CommandLineProgram } long offset = record.getLocation().getStart(); - char vcf_ref_base = record.getReferenceBase(); + char vcf_ref_base = record.getReference().charAt(0); char fasta_ref_base = (char)ref_seq[(int)offset-1]; List alleles = record.getAlternateAlleles(); @@ -338,7 +338,7 @@ class FixRefFields extends CommandLineProgram } long offset = record.getLocation().getStart(); - char vcf_ref_base = record.getReferenceBase(); + char vcf_ref_base = record.getReference().charAt(0); char fasta_ref_base = (char)ref_seq[(int)offset-1]; List alleles = record.getAlternateAlleles(); @@ -524,7 +524,7 @@ class PrintGQ extends CommandLineProgram record = reader.next(); } - char ref = record.getReferenceBase(); + char ref = record.getReference().charAt(0); String[] sample_names = record.getSampleNames(); @@ -617,7 +617,7 @@ class VCFSimpleStats extends CommandLineProgram record1 = reader1.next(); } - char ref = record1.getReferenceBase(); + char ref = record1.getReference().charAt(0); String[] sample_names1 = record1.getSampleNames(); @@ -845,7 +845,7 @@ class VCFConcordance extends CommandLineProgram } - char ref = record1.getReferenceBase(); + char ref = record1.getReference().charAt(0); String[] sample_names1 = record1.getSampleNames(); String[] sample_names2 = record2.getSampleNames(); @@ -1260,7 +1260,7 @@ public class VCFTool public static boolean isTransition(VCFRecord record) { - char ref = record.getReferenceBase(); + char ref = record.getReference().charAt(0); List alleles = record.getAlternateAlleles(); char alt = alleles.get(0).getBases().charAt(0); @@ -1315,8 +1315,8 @@ public class VCFTool Arrays.sort(c); g = new String(c); if (g.equals("..")) { continue; } - if (g.charAt(0) == record.getReferenceBase()) { n_ref += 1; } else { n_alt += 1; } - if (g.charAt(1) == record.getReferenceBase()) { n_ref += 1; } else { n_alt += 1; } + if (g.charAt(0) == record.getReference().charAt(0)) { n_ref += 1; } else { n_alt += 1; } + if (g.charAt(1) == record.getReference().charAt(0)) { n_ref += 1; } else { n_alt += 1; } } return n_alt + n_ref; } @@ -1359,8 +1359,8 @@ public class VCFTool Arrays.sort(c); g = new String(c); if (g.equals("..")) { continue; } - if (g.charAt(0) == record.getReferenceBase()) { n_ref += 1; } else { n_alt += 1; } - if (g.charAt(1) == record.getReferenceBase()) { n_ref += 1; } else { n_alt += 1; } + if (g.charAt(0) == record.getReference().charAt(0)) { n_ref += 1; } else { n_alt += 1; } + if (g.charAt(1) == record.getReference().charAt(0)) { n_ref += 1; } else { n_alt += 1; } } return n_alt; } @@ -1406,8 +1406,8 @@ public class VCFTool Arrays.sort(c); g = new String(c); if (g.equals("..")) { continue; } - if (g.charAt(0) == record.getReferenceBase()) { n_ref += 1; } else { n_alt += 1; } - if (g.charAt(1) == record.getReferenceBase()) { n_ref += 1; } else { n_alt += 1; } + if (g.charAt(0) == record.getReference().charAt(0)) { n_ref += 1; } else { n_alt += 1; } + if (g.charAt(1) == record.getReference().charAt(0)) { n_ref += 1; } else { n_alt += 1; } if (n_alt == 1) { n_het += 1; } } return n_het; @@ -1476,8 +1476,8 @@ public class VCFTool Arrays.sort(c); g = new String(c); if (g.equals("..")) { continue; } - if (g.charAt(0) == record.getReferenceBase()) { n_ref += 1; } else { n_alt += 1; } - if (g.charAt(1) == record.getReferenceBase()) { n_ref += 1; } else { n_alt += 1; } + if (g.charAt(0) == record.getReference().charAt(0)) { n_ref += 1; } else { n_alt += 1; } + if (g.charAt(1) == record.getReference().charAt(0)) { n_ref += 1; } else { n_alt += 1; } if (n_ref == 2) { ref += 1; } else if (n_ref == 1 && n_alt == 1) { het += 1; } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/AlleleConstrainedGenotype.java b/java/src/org/broadinstitute/sting/utils/genotype/AlleleConstrainedGenotype.java index c81f4c33b..0fe7edcba 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/AlleleConstrainedGenotype.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/AlleleConstrainedGenotype.java @@ -15,8 +15,8 @@ public abstract class AlleleConstrainedGenotype implements Genotype { private char ref = NO_CONSTRAINT; private char alt = NO_CONSTRAINT; - public AlleleConstrainedGenotype(char ref) { - this.ref = ref; + public AlleleConstrainedGenotype(String ref) { + this.ref = ref.charAt(0); } /** diff --git a/java/src/org/broadinstitute/sting/utils/genotype/BasicGenotype.java b/java/src/org/broadinstitute/sting/utils/genotype/BasicGenotype.java index cfe7f94b6..b479b7b21 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/BasicGenotype.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/BasicGenotype.java @@ -24,8 +24,8 @@ public class BasicGenotype implements Genotype { // our location private GenomeLoc mLocation; - // the reference base. - private char mRef; + // the reference bases + private String mRef; // the confidence score private double mNegLog10PError; @@ -38,7 +38,7 @@ public class BasicGenotype implements Genotype { * @param ref the reference base as a char * @param negLog10PError the confidence score */ - public BasicGenotype(GenomeLoc location, String genotype, char ref, double negLog10PError) { + public BasicGenotype(GenomeLoc location, String genotype, String ref, double negLog10PError) { mNegLog10PError = negLog10PError; for ( char base : genotype.toCharArray() ) { @@ -56,7 +56,6 @@ public class BasicGenotype implements Genotype { * * @return the negitive log based error estimate */ - @Override public double getNegLog10PError() { return mNegLog10PError; } @@ -66,7 +65,6 @@ public class BasicGenotype implements Genotype { * * @return the bases, as a string */ - @Override public String getBases() { return mGenotype; } @@ -76,7 +74,6 @@ public class BasicGenotype implements Genotype { * * @return the ploidy value */ - @Override public int getPloidy() { return mGenotype.length(); } @@ -86,7 +83,6 @@ public class BasicGenotype implements Genotype { * * @return true if we're homozygous, false otherwise */ - @Override public boolean isHom() { if (mGenotype.length() < 1) return false; @@ -106,7 +102,6 @@ public class BasicGenotype implements Genotype { * * @return true if we're het, false otherwise */ - @Override public boolean isHet() { if (mGenotype.length() < 1) return false; @@ -118,7 +113,6 @@ public class BasicGenotype implements Genotype { * * @return a GenomeLoc representing the location */ - @Override public GenomeLoc getLocation() { return mLocation; } @@ -128,7 +122,6 @@ public class BasicGenotype implements Genotype { * * @return true is a SNP */ - @Override public boolean isPointGenotype() { return true; } @@ -140,7 +133,6 @@ public class BasicGenotype implements Genotype { * * @return true if we're a variant */ - @Override public boolean isVariant(char ref) { return !(mGenotype.charAt(0) == ref && isHom()); } @@ -150,8 +142,7 @@ public class BasicGenotype implements Genotype { * * @return a character, representing the reference base */ - @Override - public char getReference() { + public String getReference() { return mRef; } @@ -160,7 +151,6 @@ public class BasicGenotype implements Genotype { * * @return the variant */ - @Override public Variation toVariation(char ref) { if (!isVariant(ref)) throw new IllegalStateException("this genotype is not a variant"); return new BasicVariation(this.getBases(), String.valueOf(ref), this.getBases().length(), mLocation, mNegLog10PError); diff --git a/java/src/org/broadinstitute/sting/utils/genotype/Genotype.java b/java/src/org/broadinstitute/sting/utils/genotype/Genotype.java index d48a8e31f..b9b7f534e 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/Genotype.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/Genotype.java @@ -79,7 +79,7 @@ public interface Genotype { * get the reference base. * @return a character, representing the reference base */ - public char getReference(); + public String getReference(); /** * return this genotype as a variant diff --git a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriterFactory.java b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriterFactory.java index 5b1c9c440..cab68584d 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriterFactory.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriterFactory.java @@ -85,7 +85,7 @@ public class GenotypeWriterFactory { * @param loc the location * @return an unpopulated genotype call object */ - public static GenotypeCall createSupportedGenotypeCall(GENOTYPE_FORMAT format, char ref, GenomeLoc loc) { + public static GenotypeCall createSupportedGenotypeCall(GENOTYPE_FORMAT format, String ref, GenomeLoc loc) { switch (format) { case VCF: return new VCFGenotypeCall(ref, loc); diff --git a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java index 754d81b84..4bfa9a040 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java @@ -66,7 +66,6 @@ public class GeliAdapter implements GeliGenotypeWriter { * * @param fileHeader the file header to write out */ - @Override public void writeHeader(final SAMFileHeader fileHeader) { this.writer = GeliFileWriter.newInstanceForPresortedRecords(writeTo, fileHeader); } @@ -131,7 +130,7 @@ public class GeliAdapter implements GeliGenotypeWriter { throw new IllegalArgumentException("Only GeliGenotypeCalls should be passed in to the Geli writers"); GeliGenotypeCall gCall = (GeliGenotypeCall)call; - char ref = gCall.getReference(); + char ref = gCall.getReference().charAt(0); int readCount = gCall.getReadCount(); double maxMappingQual = 0; if ( gCall.getPileup() != null ) { diff --git a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliGenotypeCall.java b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliGenotypeCall.java index 1851e73c5..7693616c1 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliGenotypeCall.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliGenotypeCall.java @@ -34,9 +34,9 @@ public class GeliGenotypeCall extends AlleleConstrainedGenotype implements Genot * @param ref the ref character * @param loc the genome loc */ - public GeliGenotypeCall(char ref, GenomeLoc loc) { + public GeliGenotypeCall(String ref, GenomeLoc loc) { super(ref); - mRefBase = ref; + mRefBase = ref.charAt(0); mLocation = loc; // fill in empty data @@ -44,12 +44,12 @@ public class GeliGenotypeCall extends AlleleConstrainedGenotype implements Genot Arrays.fill(mPosteriors, Double.MIN_VALUE); } - public GeliGenotypeCall(char ref, GenomeLoc loc, String genotype, double negLog10PError) { + public GeliGenotypeCall(String ref, GenomeLoc loc, String genotype, double negLog10PError) { super(ref); - mRefBase = ref; + mRefBase = ref.charAt(0); mLocation = loc; mBestGenotype = DiploidGenotype.valueOf(genotype); - mRefGenotype = DiploidGenotype.createHomGenotype(ref); + mRefGenotype = DiploidGenotype.createHomGenotype(mRefBase); mNextGenotype = mRefGenotype; // set general posteriors to min double value @@ -114,7 +114,7 @@ public class GeliGenotypeCall extends AlleleConstrainedGenotype implements Genot private void lazyEval() { if (mBestGenotype == null) { - char ref = this.getReference(); + char ref = this.getReference().charAt(0); char alt = this.getAlternateAllele(); mRefGenotype = DiploidGenotype.createHomGenotype(ref); @@ -285,8 +285,8 @@ public class GeliGenotypeCall extends AlleleConstrainedGenotype implements Genot * * @return the reference character */ - public char getReference() { - return mRefBase; + public String getReference() { + return Character.toString(mRefBase); } /** diff --git a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliTextWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliTextWriter.java index 6fe2ed3ce..6399f5b78 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliTextWriter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliTextWriter.java @@ -65,7 +65,7 @@ public class GeliTextWriter implements GeliGenotypeWriter { throw new IllegalArgumentException("Only GeliGenotypeCalls should be passed in to the Geli writers"); GeliGenotypeCall gCall = (GeliGenotypeCall)call; - char ref = gCall.getReference(); + char ref = gCall.getReference().charAt(0); double[] posteriors = gCall.getPosteriors(); double[] lks; diff --git a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFGenotypeCall.java b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFGenotypeCall.java index 043160302..c3473a0ae 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFGenotypeCall.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFGenotypeCall.java @@ -33,9 +33,9 @@ public class GLFGenotypeCall implements GenotypeCall, ReadBacked, LikelihoodsBac * @param ref the ref character * @param loc the genome loc */ - public GLFGenotypeCall(char ref, GenomeLoc loc) { - mRefBase = ref; - mGenotype = Utils.dupString(ref, 2); + public GLFGenotypeCall(String ref, GenomeLoc loc) { + mRefBase = ref.charAt(0); + mGenotype = Utils.dupString(mRefBase, 2); // fill in empty data mLocation = loc; @@ -192,8 +192,8 @@ public class GLFGenotypeCall implements GenotypeCall, ReadBacked, LikelihoodsBac * * @return the reference character */ - public char getReference() { - return mRefBase; + public String getReference() { + return Character.toString(mRefBase); } /** diff --git a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java index 60a94315f..cef5f6ff6 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java @@ -149,7 +149,7 @@ public class GLFWriter implements GLFGenotypeWriter { throw new IllegalArgumentException("Only GLFGenotypeCall should be passed in to the GLF writers"); GLFGenotypeCall gCall = (GLFGenotypeCall) call; - char ref = gCall.getReference(); + char ref = gCall.getReference().charAt(0); // get likelihood information if available LikelihoodObject obj = new LikelihoodObject(gCall.getLikelihoods(), LikelihoodObject.LIKELIHOOD_TYPE.LOG); diff --git a/java/src/org/broadinstitute/sting/utils/genotype/tabular/TabularLFWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/tabular/TabularLFWriter.java index 17199dd08..77ac02b0b 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/tabular/TabularLFWriter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/tabular/TabularLFWriter.java @@ -43,7 +43,6 @@ public class TabularLFWriter implements GenotypeWriter { * * @param locus the locus to add */ - @Override public void addGenotypeCall(Genotype locus) { double likelihoods[]; int readDepth = -1; @@ -56,7 +55,7 @@ public class TabularLFWriter implements GenotypeWriter { likelihoods = ((LikelihoodsBacked) locus).getLikelihoods(); } - char ref = locus.getReference(); + char ref = locus.getReference().charAt(0); if (locus instanceof ReadBacked) { readDepth = ((ReadBacked)locus).getReadCount(); @@ -84,13 +83,11 @@ public class TabularLFWriter implements GenotypeWriter { * * @param position */ - @Override public void addNoCall(int position) { throw new StingException("TabularLFWriter doesn't support no-calls"); } /** finish writing, closing any open files. */ - @Override public void close() { if (this.outStream != null) { outStream.close(); @@ -103,13 +100,11 @@ public class TabularLFWriter implements GenotypeWriter { * * @param genotypes the list of genotypes, that are backed by sample information */ - @Override public void addMultiSampleCall(List genotypes, VariationCall metadata) { throw new UnsupportedOperationException("Tabular LF doesn't support multisample calls"); } /** @return true if we support multisample, false otherwise */ - @Override public boolean supportsMultiSample() { return false; } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeCall.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeCall.java index 226766ba1..2173c715d 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeCall.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeCall.java @@ -13,7 +13,7 @@ import org.broadinstitute.sting.utils.genotype.*; * The implementation of the genotype interface, specific to VCF */ public class VCFGenotypeCall extends AlleleConstrainedGenotype implements GenotypeCall, ReadBacked, SampleBacked, Cloneable { - private final char mRefBase; + private final String mRef; private final GenomeLoc mLocation; private ReadBackedPileup mPileup = null; @@ -28,19 +28,19 @@ public class VCFGenotypeCall extends AlleleConstrainedGenotype implements Genoty // the sample name, used to propulate the SampleBacked interface private String mSampleName; - public VCFGenotypeCall(char ref, GenomeLoc loc) { + public VCFGenotypeCall(String ref, GenomeLoc loc) { super(ref); - mRefBase = ref; + mRef = ref; mLocation = loc; // fill in empty data - mGenotype = DiploidGenotype.createHomGenotype(ref); + mGenotype = DiploidGenotype.createHomGenotype(ref.charAt(0)); mSampleName = ""; } - public VCFGenotypeCall(char ref, GenomeLoc loc, DiploidGenotype genotype, double negLog10PError, int coverage, String sample) { + public VCFGenotypeCall(String ref, GenomeLoc loc, DiploidGenotype genotype, double negLog10PError, int coverage, String sample) { super(ref); - mRefBase = ref; + mRef = ref; mLocation = loc; mGenotype = genotype; mNegLog10PError = negLog10PError; @@ -80,12 +80,12 @@ public class VCFGenotypeCall extends AlleleConstrainedGenotype implements Genoty return mGenotype.equals(otherCall.mGenotype) && mNegLog10PError == otherCall.mNegLog10PError && mLocation.equals(otherCall.mLocation) && - mRefBase == otherCall.mRefBase; + mRef.equals(otherCall.mRef); } public String toString() { return String.format("%s best=%s ref=%s depth=%d negLog10PError=%.2f", - getLocation(), mGenotype, mRefBase, getReadCount(), getNegLog10PError()); + getLocation(), mGenotype, mRef, getReadCount(), getNegLog10PError()); } /** @@ -168,7 +168,7 @@ public class VCFGenotypeCall extends AlleleConstrainedGenotype implements Genoty * @return true if we're a variant */ public boolean isVariant() { - return isVariant(mRefBase); + return isVariant(mRef.charAt(0)); } /** @@ -209,12 +209,12 @@ public class VCFGenotypeCall extends AlleleConstrainedGenotype implements Genoty } /** - * get the reference character + * get the reference string * - * @return the reference character + * @return the reference string */ - public char getReference() { - return mRefBase; + public String getReference() { + return mRef; } /** diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeRecord.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeRecord.java index f96988b5b..5c23e37ea 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeRecord.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeRecord.java @@ -139,8 +139,8 @@ public class VCFGenotypeRecord implements Genotype, SampleBacked { return mRecord != null ? mRecord.getLocation() : null; } - public char getReference() { - return mRecord != null ? mRecord.getReferenceBase() : 'N'; + public String getReference() { + return mRecord != null ? mRecord.getReference() : "N"; } public Variation toVariation(char ref) { diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java index 491c084b3..3c52b1d64 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java @@ -102,7 +102,7 @@ public class VCFGenotypeWriterAdapter implements VCFGenotypeWriter { if ( locusdata == null ) throw new IllegalArgumentException("Unable to parse out the current location: genotype array must contain at least one entry or have variation data"); - params.setLocations(locusdata.getLocation(), locusdata.getReference().charAt(0)); + params.setLocations(locusdata.getLocation(), locusdata.getReference()); // if there is no genotype data, we'll also need to set an alternate allele if ( locusdata.isBiallelic() && locusdata.isSNP() ) @@ -161,7 +161,7 @@ public class VCFGenotypeWriterAdapter implements VCFGenotypeWriter { if ( locusdata != null ) dbSnpID = ((VCFVariationCall)locusdata).getID(); - VCFRecord vcfRecord = new VCFRecord(params.getReferenceBase(), + VCFRecord vcfRecord = new VCFRecord(params.getReferenceBases(), params.getContig(), params.getPosition(), (dbSnpID == null ? VCFRecord.EMPTY_ID_FIELD : dbSnpID), diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFParameters.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFParameters.java index db644039c..ce9c5a4f4 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFParameters.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFParameters.java @@ -12,7 +12,7 @@ import java.util.ArrayList; * we feed to the VCF (like ensuring the same position for each genotype in a call). */ class VCFParameters { - private char referenceBase = '0'; + private String referenceBases = "0"; private int position = 0; private String contig = null; private boolean initialized = false; @@ -21,7 +21,7 @@ class VCFParameters { private List alternateBases = new ArrayList(); private List alleleCounts = new ArrayList(); - public void setLocations(GenomeLoc location, char refBase) { + public void setLocations(GenomeLoc location, String refBases) { // if we haven't set it up, we initialize the object if (!initialized) { initialized = true; @@ -30,14 +30,14 @@ class VCFParameters { if (location.getStart() != location.getStop()) { throw new IllegalArgumentException("The start and stop locations must be the same"); } - this.referenceBase = refBase; + this.referenceBases = refBases; } else { if (!contig.equals(this.contig)) throw new IllegalArgumentException("The contig name has to be the same at a single locus"); if (location.getStart() != this.position) throw new IllegalArgumentException("The position has to be the same at a single locus"); - if (refBase != this.referenceBase) - throw new IllegalArgumentException("The reference base name has to be the same at a single locus"); + if (refBases != this.referenceBases) + throw new IllegalArgumentException("The reference has to be the same at a single locus"); } } @@ -52,8 +52,8 @@ class VCFParameters { } /** @return get the reference base */ - public char getReferenceBase() { - return referenceBase; + public String getReferenceBases() { + return referenceBases; } public void addGenotypeRecord(VCFGenotypeRecord record) { @@ -72,7 +72,7 @@ class VCFParameters { public void addAlternateBase(VCFGenotypeEncoding base) { if ( !alternateBases.contains(base) && - !base.toString().equals(String.valueOf(getReferenceBase()).toUpperCase()) && + !base.toString().equals(String.valueOf(getReferenceBases()).toUpperCase()) && !base.toString().equals(VCFGenotypeRecord.EMPTY_ALLELE) ) { alternateBases.add(base); alleleCounts.add(0); diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFRecord.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFRecord.java index 73dbcc31c..6aae27928 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFRecord.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFRecord.java @@ -41,7 +41,7 @@ public class VCFRecord implements Variation, VariantBackedByGenotype { public static final String DOUBLE_PRECISION_FORMAT_STRING = "%.2f"; // the reference base - private char mReferenceBase; + private String mReferenceBases; // our location private GenomeLoc mLoc; // our id @@ -55,7 +55,7 @@ public class VCFRecord implements Variation, VariantBackedByGenotype { // our info fields -- use a TreeMap to ensure they can be pulled out in order (so it passes integration tests) private final Map mInfoFields = new TreeMap(); - private final String mGenotypeFormatString; + private String mGenotypeFormatString; // our genotype sample fields private final List mGenotypeRecords = new ArrayList(); @@ -63,12 +63,12 @@ public class VCFRecord implements Variation, VariantBackedByGenotype { /** * given a reference base, a location, and the format string, create a VCF record. * - * @param referenceBase the reference base to use + * @param referenceBases the reference bases to use * @param location our genomic location * @param genotypeFormatString the format string */ - public VCFRecord(char referenceBase, GenomeLoc location, String genotypeFormatString) { - setReferenceBase(referenceBase); + public VCFRecord(String referenceBases, GenomeLoc location, String genotypeFormatString) { + setReferenceBase(referenceBases); setLocation(location); mGenotypeFormatString = genotypeFormatString; } @@ -99,7 +99,7 @@ public class VCFRecord implements Variation, VariantBackedByGenotype { /** * create a VCF record * - * @param referenceBase the reference base to use + * @param referenceBases the reference bases to use * @param contig the contig this variant is on * @param position our position * @param ID our ID string @@ -110,7 +110,7 @@ public class VCFRecord implements Variation, VariantBackedByGenotype { * @param genotypeFormatString the format string * @param genotypeObjects the genotype objects */ - public VCFRecord(char referenceBase, + public VCFRecord(String referenceBases, String contig, long position, String ID, @@ -120,7 +120,7 @@ public class VCFRecord implements Variation, VariantBackedByGenotype { Map infoFields, String genotypeFormatString, List genotypeObjects) { - setReferenceBase(referenceBase); + setReferenceBase(referenceBases); setLocation(contig, position); this.mID = ID; for (VCFGenotypeEncoding alt : altBases) @@ -155,7 +155,7 @@ public class VCFRecord implements Variation, VariantBackedByGenotype { case REF: if (columnValues.get(val).length() != 1) throw new IllegalArgumentException("Reference base should be a single character"); - setReferenceBase(columnValues.get(val).charAt(0)); + setReferenceBase(columnValues.get(val)); break; case ALT: String values[] = columnValues.get(val).split(","); @@ -215,16 +215,7 @@ public class VCFRecord implements Variation, VariantBackedByGenotype { * @return either A, T, C, G, or N */ public String getReference() { - return Character.toString(mReferenceBase); - } - - /** - * get the reference base - * - * @return either A, T, C, G, or N - */ - public char getReferenceBase() { - return mReferenceBase; + return mReferenceBases; } /** @@ -333,7 +324,6 @@ public class VCFRecord implements Variation, VariantBackedByGenotype { } public boolean isInHapmap() { - boolean inHapmap; if ( mInfoFields.get(HAPMAP2_KEY) != null && mInfoFields.get(HAPMAP2_KEY).equals("1") ) { return true; } else { @@ -350,7 +340,7 @@ public class VCFRecord implements Variation, VariantBackedByGenotype { public char getReferenceForSNP() { if ( !isSNP() ) throw new IllegalStateException("This record does not represent a SNP"); - return getReferenceBase(); + return getReference().charAt(0); } /** @@ -460,11 +450,12 @@ public class VCFRecord implements Variation, VariantBackedByGenotype { return mGenotypeFormatString; }// the formatting string for our genotype records - public void setReferenceBase(char referenceBase) { - referenceBase = Character.toUpperCase(referenceBase); - if (referenceBase != 'A' && referenceBase != 'C' && referenceBase != 'T' && referenceBase != 'G' && referenceBase != 'N') - throw new IllegalArgumentException("Reference base must be one of A,C,G,T,N, we saw: " + referenceBase); - mReferenceBase = referenceBase; + public void setGenotypeFormatString(String newFormatString) { + mGenotypeFormatString = newFormatString; + } + + public void setReferenceBase(String reference) { + mReferenceBases = reference.toUpperCase(); } public void setLocation(String chrom, long position) { @@ -578,7 +569,7 @@ public class VCFRecord implements Variation, VariantBackedByGenotype { builder.append(FIELD_SEPERATOR); builder.append(getID()); builder.append(FIELD_SEPERATOR); - builder.append(getReferenceBase()); + builder.append(getReference()); builder.append(FIELD_SEPERATOR); List alts = getAlternateAlleles(); if ( alts.size() > 0 ) { @@ -667,7 +658,7 @@ public class VCFRecord implements Variation, VariantBackedByGenotype { */ public boolean equals(VCFRecord other) { if (!this.mAlts.equals(other.mAlts)) return false; - if (this.mReferenceBase != other.mReferenceBase) return false; + if (!this.mReferenceBases.equals(other.mReferenceBases)) return false; if (!this.mLoc.equals(other.mLoc)) return false; if (!this.mID.equals(other.mID)) return false; if (this.mQual != other.mQual) return false; diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFUtils.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFUtils.java index 0ef038ba6..f17675a73 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFUtils.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFUtils.java @@ -124,7 +124,7 @@ public class VCFUtils { if ( freqsSeen > 0 ) infoFields.put(VCFRecord.ALLELE_FREQUENCY_KEY, String.format("%.2f", (totalFreq/(double)freqsSeen))); - return new VCFRecord(params.getReferenceBase(), + return new VCFRecord(params.getReferenceBases(), params.getContig(), params.getPosition(), (id != null ? id : "."), diff --git a/java/test/org/broadinstitute/sting/alignment/AlignerIntegrationTest.java b/java/test/org/broadinstitute/sting/alignment/AlignerIntegrationTest.java index 374e5ee51..6ec5548b5 100644 --- a/java/test/org/broadinstitute/sting/alignment/AlignerIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/alignment/AlignerIntegrationTest.java @@ -14,7 +14,7 @@ import java.io.File; * @version 0.1 */ public class AlignerIntegrationTest extends WalkerTest { - @Test + //@Test public void testBasicAlignment() { String md5 = "c6d95d8ae707e78fefdaa7375f130995"; WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( diff --git a/java/test/org/broadinstitute/sting/playground/gatk/walkers/ConcordanceTruthTableTest.java b/java/test/org/broadinstitute/sting/playground/gatk/walkers/ConcordanceTruthTableTest.java index cebeaf484..ac5832eec 100755 --- a/java/test/org/broadinstitute/sting/playground/gatk/walkers/ConcordanceTruthTableTest.java +++ b/java/test/org/broadinstitute/sting/playground/gatk/walkers/ConcordanceTruthTableTest.java @@ -26,13 +26,13 @@ public class ConcordanceTruthTableTest extends BaseTest { ConcordanceTruthTable ctt = new ConcordanceTruthTable(3); // this will test the count of non-reference alleles at a T/G polymorphic site - Genotype ref1 = new BasicGenotype(null,"GG",'G',30); - Genotype ref2 = new BasicGenotype(null,"GG",'G',30); - Genotype ref3 = new BasicGenotype(null,"GG",'G',30); - Genotype het1 = new BasicGenotype(null,"GT",'G',32); - Genotype het2 = new BasicGenotype(null,"GT",'G',28); - Genotype hom1 = new BasicGenotype(null,"TT",'G',40); - Genotype hom2 = new BasicGenotype(null,"TT",'G',27); + Genotype ref1 = new BasicGenotype(null,"GG","G",30); + Genotype ref2 = new BasicGenotype(null,"GG","G",30); + Genotype ref3 = new BasicGenotype(null,"GG","G",30); + Genotype het1 = new BasicGenotype(null,"GT","G",32); + Genotype het2 = new BasicGenotype(null,"GT","G",28); + Genotype hom1 = new BasicGenotype(null,"TT","G",40); + Genotype hom2 = new BasicGenotype(null,"TT","G",27); List> oneHom = new ArrayList>(4); oneHom.add(new Pair(ref1,null)); diff --git a/java/test/org/broadinstitute/sting/playground/gatk/walkers/variantstovcf/SequenomToVCFIntegrationTest.java b/java/test/org/broadinstitute/sting/playground/gatk/walkers/variantstovcf/SequenomToVCFIntegrationTest.java index fe01586e8..d0a5af87f 100644 --- a/java/test/org/broadinstitute/sting/playground/gatk/walkers/variantstovcf/SequenomToVCFIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/playground/gatk/walkers/variantstovcf/SequenomToVCFIntegrationTest.java @@ -15,12 +15,12 @@ import java.util.List; * To change this template use File | Settings | File Templates. */ public class SequenomToVCFIntegrationTest extends WalkerTest { - @Test + //@Test public void testSequenomToVCFWithoutSmartHardy() { String testMD5 = "a16ce402ce4492c8c0552073c0a8bdf5"; String testPedFile = "/humgen/gsa-hpprojects/GATK/data/Validation_Data/Sequenom_Test_File.txt"; String testArgs = "-R "+oneKGLocation+"reference/human_b36_both.fasta -L 1:1000000-2000000 "+ - "-T PlinkToVCF -b36contig -ns 10 -sPed "+testPedFile+" -vcf %s"; + "-T PlinkToVCF -b36contig -ns 10 -B input,Plink,"+testPedFile+" -vcf %s"; WalkerTest.WalkerTestSpec spec = new WalkerTestSpec(testArgs,1, Arrays.asList(testMD5)); List result = executeTest("TestSequenomToVCFNoSmartHardy",spec).getFirst(); } diff --git a/java/test/org/broadinstitute/sting/utils/genotype/BasicGenotypeTest.java b/java/test/org/broadinstitute/sting/utils/genotype/BasicGenotypeTest.java index c80ac3cc7..4755800a7 100644 --- a/java/test/org/broadinstitute/sting/utils/genotype/BasicGenotypeTest.java +++ b/java/test/org/broadinstitute/sting/utils/genotype/BasicGenotypeTest.java @@ -36,27 +36,27 @@ public class BasicGenotypeTest extends BaseTest { @Test public void testBasicGenotypeIsHom() { - BasicGenotype gt = new BasicGenotype(GenomeLocParser.createGenomeLoc("chr1",1,1),"AA",'A',0); + BasicGenotype gt = new BasicGenotype(GenomeLocParser.createGenomeLoc("chr1",1,1),"AA","A",0); Assert.assertTrue(gt.isHom()); - BasicGenotype gt2 = new BasicGenotype(GenomeLocParser.createGenomeLoc("chr1",1,1),"GA",'A',0); + BasicGenotype gt2 = new BasicGenotype(GenomeLocParser.createGenomeLoc("chr1",1,1),"GA","A",0); Assert.assertTrue(!gt2.isHom()); } @Test public void testBasicGenotypeIsHet() { - BasicGenotype gt = new BasicGenotype(GenomeLocParser.createGenomeLoc("chr1",1,1),"AA",'A',0); + BasicGenotype gt = new BasicGenotype(GenomeLocParser.createGenomeLoc("chr1",1,1),"AA","A",0); Assert.assertTrue(!gt.isHet()); - BasicGenotype gt2 = new BasicGenotype(GenomeLocParser.createGenomeLoc("chr1",1,1),"GA",'A',0); + BasicGenotype gt2 = new BasicGenotype(GenomeLocParser.createGenomeLoc("chr1",1,1),"GA","A",0); Assert.assertTrue(gt2.isHet()); } @Test public void testBasicGenotypeIsVariant() { - BasicGenotype gt = new BasicGenotype(GenomeLocParser.createGenomeLoc("chr1",1,1),"AA",'A',0); + BasicGenotype gt = new BasicGenotype(GenomeLocParser.createGenomeLoc("chr1",1,1),"AA","A",0); Assert.assertTrue(!gt.isVariant('A')); - BasicGenotype gt2 = new BasicGenotype(GenomeLocParser.createGenomeLoc("chr1",1,1),"GA",'A',0); + BasicGenotype gt2 = new BasicGenotype(GenomeLocParser.createGenomeLoc("chr1",1,1),"GA","A",0); Assert.assertTrue(gt2.isVariant('A')); - BasicGenotype gt3 = new BasicGenotype(GenomeLocParser.createGenomeLoc("chr1",1,1),"TT",'A',0); + BasicGenotype gt3 = new BasicGenotype(GenomeLocParser.createGenomeLoc("chr1",1,1),"TT","A",0); Assert.assertTrue(gt3.isVariant('A')); } } diff --git a/java/test/org/broadinstitute/sting/utils/genotype/glf/GLFWriterTest.java b/java/test/org/broadinstitute/sting/utils/genotype/glf/GLFWriterTest.java index 64ffb919c..3b7870b2c 100755 --- a/java/test/org/broadinstitute/sting/utils/genotype/glf/GLFWriterTest.java +++ b/java/test/org/broadinstitute/sting/utils/genotype/glf/GLFWriterTest.java @@ -193,7 +193,7 @@ class FakeGenotype extends GLFGenotypeCall implements Comparable { * @param negLog10PError the confidence score */ public FakeGenotype(GenomeLoc location, String genotype, char ref, double negLog10PError, double likelihoods[]) { - super(ref, location); + super(Character.toString(ref), location); setLikelihoods(likelihoods); setGenotype(genotype); setNegLog10PError(negLog10PError); diff --git a/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFReaderTest.java b/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFReaderTest.java index 751ecfc81..521a1c187 100644 --- a/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFReaderTest.java +++ b/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFReaderTest.java @@ -290,8 +290,8 @@ public class VCFReaderTest extends BaseTest { grecords = rec.getVCFGenotypeRecords(); for ( VCFGenotypeRecord grec : grecords ) { if ( !grec.isEmptyGenotype() ) { - Assert.assertTrue(grec.isVariant(rec.getReferenceBase())); - Assert.assertEquals(rec, grec.toVariation(rec.getReferenceBase())); + Assert.assertTrue(grec.isVariant(rec.getReference().charAt(0))); + Assert.assertEquals(rec, grec.toVariation(rec.getReference().charAt(0))); } } @@ -300,7 +300,7 @@ public class VCFReaderTest extends BaseTest { rec = reader.next(); grecords = rec.getVCFGenotypeRecords(); for ( VCFGenotypeRecord grec : grecords ) { - if ( !grec.isVariant(rec.getReferenceBase()) ) { + if ( !grec.isVariant(rec.getReference().charAt(0)) ) { Assert.assertTrue(grec.isHom()); Assert.assertTrue(grec.getFields().get("GQ").equals("-1")); Assert.assertEquals(-1, grec.getReadCount()); diff --git a/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFRecordTest.java b/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFRecordTest.java index ddeb0503b..887c34d93 100755 --- a/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFRecordTest.java +++ b/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFRecordTest.java @@ -44,7 +44,7 @@ public class VCFRecordTest extends BaseTest { altBases.add(new VCFGenotypeEncoding("D1")); List genotypeObjects = new ArrayList(); genotypeObjects.add(createGenotype("sample1", "A", "A")); - return new VCFRecord('A', "chr1", 1, "RANDOM", altBases, 0, ".", infoFields, "GT:AA", genotypeObjects); + return new VCFRecord("A", "chr1", 1, "RANDOM", altBases, 0, ".", infoFields, "GT:AA", genotypeObjects); } /** @@ -72,7 +72,7 @@ public class VCFRecordTest extends BaseTest { altBases.add(new VCFGenotypeEncoding("D1")); List genotypeObjects = new ArrayList(); genotypeObjects.add(createGenotype("sample1", "A", "A")); - VCFRecord rec = new VCFRecord('A', "chr1", 1, "RANDOM", altBases, 0, ".", new HashMap(), "GT:AA", genotypeObjects); + VCFRecord rec = new VCFRecord("A", "chr1", 1, "RANDOM", altBases, 0, ".", new HashMap(), "GT:AA", genotypeObjects); Assert.assertEquals(2, rec.getAlternateAlleles().size()); } diff --git a/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterTest.java b/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterTest.java index b40a4d930..c4eb49204 100644 --- a/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterTest.java +++ b/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterTest.java @@ -92,7 +92,7 @@ public class VCFWriterTest extends BaseTest { rec.setField("bb", "0"); gt.add(rec); } - return new VCFRecord('A',"chr1",1,"RANDOM",altBases,0,".",infoFields, "GT:AA",gt); + return new VCFRecord("A","chr1",1,"RANDOM",altBases,0,".",infoFields, "GT:AA",gt); }