1. To make indel calls, we need to get rid of the SNP-centricity of our code. First step is to have the reference be a String, not a char in the Genotype. Note that this is just a temporary patch until the genotype code is ported over to use VariantContext.

2. Significant refactoring of Plink code to work in the rods and use VariantContext.  More coming.



git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2913 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2010-03-02 20:26:40 +00:00
parent 6ceae22793
commit 5f3c80d9aa
41 changed files with 333 additions and 642 deletions

View File

@ -1,105 +1,124 @@
package org.broadinstitute.sting.gatk.refdata;
/*
import org.broadinstitute.sting.oneoffprojects.variantcontext.Allele;
import org.broadinstitute.sting.oneoffprojects.variantcontext.Genotype;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.gatk.contexts.variantcontext.*;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import java.io.*;
import java.util.*;
import net.sf.samtools.SAMFileHeader;
/**
* Created by IntelliJ IDEA.
* User: chartl
* Date: Jan 19, 2010
* Time: 10:24:18 AM
* To change this template use File | Settings | File Templates.
* User: chartl, ebanks
*
public class PlinkRod extends BasicReferenceOrderedDatum implements ReferenceOrderedDatum {
*/
public class PlinkRod extends BasicReferenceOrderedDatum implements Iterator<PlinkRod> {
public static final String SEQUENOM_NO_CALL = "0";
public static final String SEQUENOM_NO_VARIANT = "-";
private final Set<String> headerEntries = new HashSet<String>(Arrays.asList("#Family ID","Individual ID","Sex",
"Paternal ID","Maternal ID","Phenotype", "FID","IID","PAT","MAT","SEX","PHENOTYPE"));
private final byte SNP_MAJOR_MODE = 1;
private ArrayList<PlinkVariantInfo> variants;
private PlinkVariantInfo currentVariant;
private ListIterator<PlinkVariantInfo> variantIterator;
private PlinkFileType plinkFileType;
public enum PlinkFileType {
STANDARD_PED,RAW_PED,BINARY_PED
STANDARD_PED, RAW_PED, BINARY_PED
}
// CONSTRUCTOR
public PlinkRod(String name) {
super(name);
}
public PlinkRod(String name, PlinkVariantInfo record, ListIterator<PlinkVariantInfo> iter) {
super(name);
currentVariant = record;
variantIterator = iter;
}
@Override
public Object initialize(final File plinkFile) throws FileNotFoundException {
if ( ! plinkFile.exists() ) {
throw new FileNotFoundException("File "+plinkFile.getAbsolutePath()+" does not exist.");
}
variants = parsePlinkFile(plinkFile);
if ( variants != null ) {
variantIterator = variants.listIterator();
currentVariant = variantIterator.next();
}
assertNotNull();
ArrayList<PlinkVariantInfo> variants = parsePlinkFile(plinkFile);
if ( variants == null )
throw new IllegalStateException("Error parsing Plink file");
variantIterator = variants.listIterator();
return null;
}
public static PlinkRod createIterator(String name, File file) {
PlinkRod plink = new PlinkRod(name);
try {
plink.initialize(file);
} catch (FileNotFoundException e) {
throw new StingException("Unable to find file " + file);
}
return plink;
}
private void assertNotNull() {
if ( currentVariant == null ) {
throw new UnsupportedOperationException ( "Current sequenom variant information was set to null" );
throw new IllegalStateException("Current variant information is null");
}
}
public boolean hasNext() {
return variantIterator.hasNext();
}
public PlinkRod next() {
if ( !this.hasNext() )
throw new NoSuchElementException("PlinkRod next called on iterator with no more elements");
// get the next record
currentVariant = variantIterator.next();
return new PlinkRod(name, currentVariant, variantIterator);
}
@Override
public boolean parseLine(Object obj, String[] args) {
if ( variantIterator.hasNext() ) {
currentVariant = variantIterator.next();
return true;
} else {
return false;
}
throw new UnsupportedOperationException("PlinkRod does not support the parseLine method");
}
public void remove() {
throw new UnsupportedOperationException("The remove operation is not supported for a PlinkRod");
}
@Override
public GenomeLoc getLocation() {
assertNotNull();
return currentVariant.getLocation();
}
@Override
public String toString() {
return currentVariant == null ? "" : currentVariant.toString();
assertNotNull();
return currentVariant.toString();
}
public String getVariantName() {
assertNotNull();
return currentVariant.getName();
}
public ArrayList<String> getVariantSampleNames() {
return currentVariant.getSampleNames();
public VariantContext getVariantContext() {
assertNotNull();
return currentVariant.getVariantContext();
}
public ArrayList<Genotype> getGenotypes() {
return currentVariant.getGenotypes();
}
public boolean variantIsSNP() {
return currentVariant.isSNP();
public boolean isIndel() {
assertNotNull();
return currentVariant.isIndel();
}
@ -123,7 +142,7 @@ public class PlinkRod extends BasicReferenceOrderedDatum implements ReferenceOrd
/* *** *** *** *** *** ** *** ** *** ** *** ** *** ** ***
* * PARSING STANDARD TEXT PED FILES * **
* *** *** *** *** *** ** *** ** *** ** *** ** *** ** ***/
/*
private ArrayList<PlinkVariantInfo> parseTextFormattedPlinkFile( File file ) {
try {
BufferedReader reader = new BufferedReader( new FileReader ( file ) );
@ -131,7 +150,7 @@ public class PlinkRod extends BasicReferenceOrderedDatum implements ReferenceOrd
ArrayList<PlinkVariantInfo> seqVars = instantiateVariantListFromHeader(header);
ArrayList<Integer> snpOffsets = getSNPOffsetsFromHeader(header);
String line = null;
String line;
do {
line = reader.readLine();
incorporateInfo(seqVars,snpOffsets,line);
@ -156,40 +175,29 @@ public class PlinkRod extends BasicReferenceOrderedDatum implements ReferenceOrd
}
String[] plinkInfo;
if ( plinkFileType == PlinkFileType.STANDARD_PED) {
plinkInfo = plinkLine.split("\t");
} else {
if ( plinkFileType != PlinkFileType.STANDARD_PED )
throw new StingException("Plink file is likely of .raw or recoded format. Please use an uncoded .ped file.");
}
plinkInfo = plinkLine.split("\t");
String individualName = plinkInfo[1];
int snpNumber = 0;
if ( plinkFileType == PlinkFileType.STANDARD_PED ) {
for ( int i : offsets ) {
vars.get(snpNumber).addGenotypeEntry(plinkInfo[i], individualName);
snpNumber++;
}
for ( int i : offsets ) {
vars.get(snpNumber).addGenotypeEntry(plinkInfo[i].split("\\s+"), individualName);
snpNumber++;
}
}
private ArrayList<PlinkVariantInfo> instantiateVariantListFromHeader(String header) {
if ( header.startsWith("#") ) {// first line is a comment; what we're used to seeing
plinkFileType = PlinkFileType.STANDARD_PED;
} else {// first line is the raw header; comes from de-binary-ing a .bed file
plinkFileType = PlinkFileType.RAW_PED;
// if the first line is not a comment (what we're used to seeing),
// then it's the raw header (comes from de-binary-ing a .bed file)
if ( !header.startsWith("#") )
throw new StingException("Plink file is likely of .raw or recoded format. Please use an uncoded .ped file.");
}
plinkFileType = PlinkFileType.STANDARD_PED;
ArrayList<PlinkVariantInfo> seqVars = new ArrayList<PlinkVariantInfo>();
String[] headerFields;
if ( plinkFileType == PlinkFileType.STANDARD_PED ) {
headerFields = header.split("\t");
} else {
throw new StingException("Plink file is likely of .raw or recoded format. Please use an uncoded .ped file.");
}
String[] headerFields = header.split("\t");
for ( String field : headerFields ) {
if ( ! headerEntries.contains(field) ) {
@ -225,7 +233,7 @@ public class PlinkRod extends BasicReferenceOrderedDatum implements ReferenceOrd
/* *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
* * PARSING BINARY PLINK BED/BIM/FAM FILES * *
* *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***/
/*
private ArrayList<PlinkVariantInfo> parseBinaryFormattedPlinkFile(File file) {
PlinkBinaryTrifecta binaryFiles = getPlinkBinaryTriplet(file);
ArrayList<PlinkVariantInfo> parsedVariants = instantiateVariantsFromBimFile(binaryFiles.bimFile);
@ -269,17 +277,16 @@ public class PlinkRod extends BasicReferenceOrderedDatum implements ReferenceOrd
ArrayList<PlinkVariantInfo> variants = new ArrayList<PlinkVariantInfo>();
try {
String line = null;
do {
String line = reader.readLine();
while ( line != null ) {
String[] snpInfo = line.split("\\s+");
PlinkVariantInfo variant = new PlinkVariantInfo(snpInfo[1]);
variant.setGenomeLoc(GenomeLocParser.parseGenomeLoc(snpInfo[0],Long.valueOf(snpInfo[3]), Long.valueOf(snpInfo[3])));
variant.setAlleles(snpInfo[4],snpInfo[5]);
variants.add(variant);
line = reader.readLine();
if ( line != null ) {
String[] snpInfo = line.split("\\s+");
PlinkVariantInfo variant = new PlinkVariantInfo(snpInfo[1],true);
variant.setGenomeLoc(GenomeLocParser.parseGenomeLoc(snpInfo[0],Long.valueOf(snpInfo[3]), Long.valueOf(snpInfo[3])));
variant.setAlleles(snpInfo[4],snpInfo[5]);
variants.add(variant);
}
} while ( line != null );
}
} catch ( IOException e ) {
throw new StingException("There was an error reading the .bim file "+bimFile.getAbsolutePath(), e);
}
@ -300,7 +307,7 @@ public class PlinkRod extends BasicReferenceOrderedDatum implements ReferenceOrd
ArrayList<String> sampleNames = new ArrayList<String>();
try {
String line = null;
String line;
do {
line = reader.readLine();
if ( line != null ) {
@ -325,7 +332,7 @@ public class PlinkRod extends BasicReferenceOrderedDatum implements ReferenceOrd
}
try {
byte genotype = -1;
byte genotype;
long bytesRead = 0;
int snpOffset = 0;
int sampleOffset = 0;
@ -397,25 +404,26 @@ public class PlinkRod extends BasicReferenceOrderedDatum implements ReferenceOrd
class PlinkVariantInfo implements Comparable {
private enum AlleleType {
INSERTION,DELETION
}
private String variantName;
private GenomeLoc loc;
private ArrayList<Genotype> genotypes = new ArrayList<Genotype>();
private ArrayList<String> sampleNames = new ArrayList<String>();
private HashSet<Allele> alleles = new HashSet<Allele>();
private HashSet<Genotype> genotypes = new HashSet<Genotype>();
private ArrayList<Allele> indelHolder;
private ArrayList<String> sampleHolder;
private int siteIndelLength;
private AlleleType indelType;
// for indels
private boolean isIndel = false;
private boolean isInsertion = false;
private int indelLength = 0;
// for binary parsing
private String locAllele1;
private String locAllele2;
public PlinkVariantInfo(String variantName) {
this.variantName = variantName;
parseName();
}
public GenomeLoc getLocation() {
return loc;
}
@ -424,79 +432,82 @@ class PlinkVariantInfo implements Comparable {
return variantName;
}
public ArrayList<String> getSampleNames() {
return sampleNames;
public VariantContext getVariantContext() {
try {
return new VariantContext(variantName, loc, alleles, genotypes);
} catch (IllegalArgumentException e) {
throw new IllegalArgumentException(e.getMessage() + "; please make sure that e.g. a sample isn't present more than one time in your ped file");
}
}
public ArrayList<Genotype> getGenotypes() {
return genotypes;
}
public boolean isSNP() {
return this.indelType == null;
public boolean isIndel() {
return isIndel;
}
public void setGenomeLoc(GenomeLoc loc) {
this.loc = loc;
}
private void parseName() {
int chromIdx = variantName.indexOf("|c");
if ( chromIdx == -1 )
throw new IllegalArgumentException("Variant name " + variantName + " does not adhere to required convention (...|c...)");
String[] pieces = variantName.substring(chromIdx+2).split("_");
if ( pieces.length < 2 )
throw new IllegalArgumentException("Variant name " + variantName + " does not adhere to required convention (...|c..._p...)");
String chrom = pieces[0];
if ( pieces[1].charAt(0) != 'p' )
throw new IllegalArgumentException("Variant name " + variantName + " does not adhere to required convention (...|c..._p...)");
String pos = pieces[1].substring(1);
loc = GenomeLocParser.parseGenomeLoc(chrom+":"+pos);
if ( pieces.length > 2 && (pieces[2].equals("gI") || pieces[2].equals("gD")) ) {
// it's an indel
isIndel = true;
isInsertion = pieces[2].equals("gI");
}
}
public void setAlleles(String al1, String al2) {
if ( al1.equals("0") ) {
if ( al1.equals(PlinkRod.SEQUENOM_NO_CALL) ) {
// encoding for a site at which no variants were detected
locAllele1 = al2;
} else {
locAllele1 = al1;
}
locAllele2 = al2;
if ( ! isSNP() ) {
siteIndelLength = Math.max(locAllele1.length(),locAllele2.length());
}
public void addGenotypeEntry(String[] alleleStrings, String sampleName) {
ArrayList<Allele> myAlleles = new ArrayList<Allele>(2);
for ( String alleleString : alleleStrings ) {
if ( alleleString.equals(PlinkRod.SEQUENOM_NO_CALL) ) {
myAlleles.add(Allele.NO_CALL);
} else {
Allele allele;
if ( !isIndel ) {
allele = new Allele(alleleString);
} else if ( alleleString.equals(PlinkRod.SEQUENOM_NO_VARIANT) ) {
allele = new Allele(alleleString, isInsertion);
} else {
allele = new Allele(alleleString, !isInsertion);
if ( indelLength == 0 ) {
indelLength = alleleString.length();
loc = GenomeLocParser.parseGenomeLoc(loc.getContig(), loc.getStart(), loc.getStart()+indelLength-1);
}
}
myAlleles.add(allele);
if ( alleles.size() < 2 )
alleles.add(allele);
}
}
}
// CONSTRUCTOR
public PlinkVariantInfo(String variantName) {
this.variantName = variantName;
parseName();
}
public PlinkVariantInfo(String variantName, boolean onlyLookForIndelInfo ) {
this.variantName = variantName;
if ( onlyLookForIndelInfo ) {
parseNameForIndels();
} else {
parseName();
}
}
private void parseName() {
String chrom = this.variantName.split("\\|c")[1].split("_")[0];
String pos = this.variantName.split("_p")[1].split("_")[0];
this.loc = GenomeLocParser.parseGenomeLoc(chrom+":"+pos);
this.parseNameForIndels();
}
private void parseNameForIndels() {
if ( this.variantName.contains("_gI") || this.variantName.contains("_gD") ) {
this.instantiateIndel();
}
}
private void instantiateIndel() {
this.indelHolder = new ArrayList<Allele>();
this.sampleHolder = new ArrayList<String>();
this.siteIndelLength = -1;
this.indelType = this.variantName.contains("_gI") ? AlleleType.INSERTION : AlleleType.DELETION;
}
public void addGenotypeEntry(String genotypeString, String sampleName) {
// identify if we're dealing with a deletion
if ( this.isSNP() ) {
this.addSNP(genotypeString.split("\\s+"),sampleName);
} else {
this.addIndel(genotypeString.split("\\s+"),sampleName);
}
genotypes.add(new Genotype(sampleName, myAlleles, 20.0));
}
public void addBinaryGenotypeEntry( int genoTYPE, String sampleName ) {
@ -515,139 +526,7 @@ class PlinkVariantInfo implements Comparable {
alleleStr[1] = "0";
}
if ( this.isSNP() ) {
this.addSNP(alleleStr,sampleName);
} else {
this.addIndel(alleleStr,sampleName);
}
}
private void addSNP(String[] alleleStrings, String sampleName) {
ArrayList<Allele> alleles = new ArrayList<Allele>(2);
for ( String alStr : alleleStrings ) {
alleles.add(new Allele(alStr.getBytes()));
}
genotypes.add(new Genotype(alleles,sampleName,20.0) );
sampleNames.add(sampleName);
}
private void addIndel(String[] alleleStrings, String sampleName) {
String alleleStr1 = alleleStrings[0];
String alleleStr2 = alleleStrings[1];
if ( alleleStr1.contains("-") ^ alleleStr2.contains("-") ) {
// heterozygous indel
if ( alleleStr1.contains("-") ) {
this.addHetIndel(alleleStr2,sampleName) ;
} else {
this.addHetIndel(alleleStr1,sampleName);
}
} else {
this.addHomIndel(alleleStr1, alleleStr2, sampleName);
}
}
private void addHetIndel(String baseStr, String sampleName) {
Allele ref;
Allele alt;
if ( indelType == AlleleType.INSERTION ) {
ref = new Allele("-",true);
alt = new Allele(baseStr.getBytes(),false);
} else {
alt = new Allele("-",false);
ref = new Allele(baseStr.getBytes(),true);
}
// this.setIndelLength(alt,baseStr.length());
if ( ! indelHolder.isEmpty() ) {
siteIndelLength = baseStr.length();
this.addHeldIndels();
}
Genotype indel = new Genotype(Arrays.asList(ref,alt), sampleName, 20.0);
// this.setIndelGenotypeLength(indel,siteIndelLength);
this.genotypes.add(indel);
this.sampleNames.add(sampleName);
}
private void addHomIndel(String strand1, String strand2, String sampleName) {
Allele allele1;
Allele allele2;
boolean reference;
if ( indelType == AlleleType.DELETION ) {
if ( strand1.contains("-") ) {
// homozygous deletion
allele1 = new Allele("-",false);
allele2 = new Allele("-",false);
reference = false;
} else { // homozygous reference at a deletion variant site
allele1 = new Allele(strand1.getBytes(),true);
allele2 = new Allele(strand2.getBytes(),true);
reference = true;
}
} else {
if ( strand1.contains("-") ) {
// homozygous reference
allele1 = new Allele("-",true);
allele2 = new Allele("-",true);
reference = true;
} else {
allele1 = new Allele(strand1.getBytes(),false);
allele2 = new Allele(strand2.getBytes(),false);
reference = false;
}
}
if ( reference ) {
if ( ! indelHolder.isEmpty() ) {
siteIndelLength = strand1.length();
this.addHeldIndels();
}
}
if ( reference || siteIndelLength != -1 ) { // if we're ref or know the insertion/deletion length of the site
Genotype gen = new Genotype(Arrays.asList(allele1,allele2), sampleName, 20.0);
// setIndelGenotypeLength(gen,siteIndelLength);
this.genotypes.add(gen);
this.sampleNames.add(sampleName);
} else { // hold on the variants until we *do* know the in/del length at this site
this.indelHolder.add(allele1);
this.indelHolder.add(allele2);
this.sampleHolder.add(sampleName);
}
}
/* private void setIndelGenotypeLength(Genotype g, int length) {
g.setAttribute(Genotype.StandardAttributes.DELETION_LENGTH,length);
}
private void addHeldIndels() {
Allele del1;
Allele del2;
int startingSize = indelHolder.size();
for ( int i = 0; i < startingSize ; i+=2 ) {
del1 = indelHolder.get(i);
del2 = indelHolder.get(i+1);
this.addIndelFromCache(del1,del2,sampleHolder.get(i/2));
if ( indelHolder.size() != startingSize ) {
throw new StingException("Halting algorithm -- possible infinite loop");
}
}
indelHolder.clear();
sampleHolder.clear();
}
private void addIndelFromCache ( Allele indel1, Allele indel2, String sampleName ) {
this.setIndelLength(indel1,siteIndelLength);
this.setIndelLength(indel2,siteIndelLength);
Genotype indel = new Genotype(Arrays.asList(indel1,indel2),sampleName, 20.0);
// this.setIndelGenotypeLength(indel,siteIndelLength);
this.genotypes.add(indel);
this.sampleNames.add(sampleName);
addGenotypeEntry(alleleStr, sampleName);
}
public int compareTo(Object obj) {
@ -657,22 +536,14 @@ class PlinkVariantInfo implements Comparable {
return loc.compareTo(((PlinkVariantInfo) obj).getLocation());
}
private void setIndelLength(Allele al, int length) {
// Todo -- once alleles support deletion lengths add that information
// Todo -- into the object; for now this can just return
return;
}
}
class PlinkBinaryTrifecta {
public PlinkBinaryTrifecta() {
}
public PlinkBinaryTrifecta() {}
public File bedFile;
public File bimFile;
public File famFile;
} */
}

View File

@ -111,6 +111,7 @@ public class ReferenceOrderedData<ROD extends ReferenceOrderedDatum> implements
addModule("PicardDbSNP", rodPicardDbSNP.class);
addModule("HapmapVCF", HapmapVCFROD.class);
addModule("Beagle", BeagleROD.class);
addModule("Plink", PlinkRod.class);
}

View File

@ -366,7 +366,7 @@ public class RodGeliText extends BasicReferenceOrderedDatum implements Variation
*/
@Override
public Genotype getCalledGenotype() {
return new GeliGenotypeCall(refBase, getLocation(), bestGenotype, lodBtnb);
return new GeliGenotypeCall(Character.toString(refBase), getLocation(), bestGenotype, lodBtnb);
}
/**
@ -377,7 +377,7 @@ public class RodGeliText extends BasicReferenceOrderedDatum implements Variation
@Override
public List<Genotype> getGenotypes() {
List<Genotype> ret = new ArrayList<Genotype>();
ret.add(new GeliGenotypeCall(refBase, getLocation(), bestGenotype, lodBtnb));
ret.add(new GeliGenotypeCall(Character.toString(refBase), getLocation(), bestGenotype, lodBtnb));
return ret;
}

View File

@ -275,7 +275,7 @@ public class RodGenotypeChipAsGFF extends BasicReferenceOrderedDatum implements
*/
@Override
public Genotype getCalledGenotype() {
return new BasicGenotype(this.getLocation(),this.feature,this.getRefSnpFWD(),this.getConsensusConfidence());
return new BasicGenotype(this.getLocation(),this.feature,Character.toString(this.getRefSnpFWD()),this.getConsensusConfidence());
}
/**
@ -286,7 +286,7 @@ public class RodGenotypeChipAsGFF extends BasicReferenceOrderedDatum implements
@Override
public List<Genotype> getGenotypes() {
List<Genotype> ret = new ArrayList<Genotype>();
ret.add(new BasicGenotype(this.getLocation(),this.feature,this.getRefSnpFWD(),this.getConsensusConfidence()));
ret.add(new BasicGenotype(this.getLocation(),this.feature,Character.toString(this.getRefSnpFWD()),this.getConsensusConfidence()));
return ret;
}

View File

@ -196,14 +196,7 @@ public class VariantContextAdaptors {
public static VCFRecord toVCF(VariantContext vc) {
// deal with the reference
char referenceBase = 'N'; // by default we'll use N
if ( vc.getReference().length() == 1 ) {
referenceBase = (char)vc.getReference().getBases()[0];
}
if ( ! vc.isSNP() )
// todo -- update the code so it works correctly with indels
throw new StingException("VariantContext -> VCF converter currently doesn't support indels; complain to the GATK team");
String referenceBase = new String(vc.getReference().getBases());
String contig = vc.getLocation().getContig();
long position = vc.getLocation().getStart();

View File

@ -95,7 +95,7 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul
for ( String sample : GLs.keySet() ) {
// create the call
GenotypeCall call = GenotypeWriterFactory.createSupportedGenotypeCall(OUTPUT_FORMAT, ref, loc);
GenotypeCall call = GenotypeWriterFactory.createSupportedGenotypeCall(OUTPUT_FORMAT, Character.toString(ref), loc);
// set the genotype and confidence
Pair<Integer, Double> AFbasedGenotype = matrix.getGenotype(frequency, sample);

View File

@ -347,7 +347,7 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> {
private RodVCF fakeVCFForSample(RodVCF eval, ReferenceContext ref, final String sampleName) {
VCFGenotypeRecord genotype = (VCFGenotypeRecord)eval.getGenotype(sampleName);
if ( genotype.getNegLog10PError() > 0 ) {
VCFRecord record = new VCFRecord(ref.getBase(), ref.getLocus(), "GT");
VCFRecord record = new VCFRecord(Character.toString(ref.getBase()), ref.getLocus(), "GT");
record.setAlternateBases(eval.getRecord().getAlternateAlleles());
record.addGenotypeRecord(genotype);
record.setQual(10*genotype.getNegLog10PError());

View File

@ -83,9 +83,9 @@ public class DeNovoSNPWalker extends RefWalker<String, Integer>{
Genotype parent1call = parent1.genotypes.get(0);
Genotype parent2call = parent2.genotypes.get(0);
if (!parent1call.isVariant(parent1call.getReference()) &&
if (!parent1call.isVariant(parent1call.getReference().charAt(0)) &&
parent1call.getNegLog10PError() > 5 &&
!parent2call.isVariant(parent2call.getReference()) &&
!parent2call.isVariant(parent2call.getReference().charAt(0)) &&
parent2call.getNegLog10PError() > 5) {
double sumConfidences = 0.5 * (0.5 * child.getNegLog10PError() +

View File

@ -81,7 +81,7 @@ public class SnpCallRateByCoverageWalker extends LocusWalker<List<String>, Strin
VariantCallContext calls = UG.map(tracker, ref, subContext);
if (calls != null && calls.genotypes != null && calls.genotypes.size() > 0) {
Genotype call = calls.genotypes.get(0);
String callType = (call.isVariant(call.getReference())) ? ((call.isHom()) ? "HomozygousSNP" : "HeterozygousSNP") : "HomozygousReference";
String callType = (call.isVariant(call.getReference().charAt(0))) ? ((call.isHom()) ? "HomozygousSNP" : "HeterozygousSNP") : "HomozygousReference";
GenotypeCalls.add(coverage+"\t"+coverage_available+"\t"+hc_genotype+"\t"+callType+"\t"+toGeliString(call));
}
}
@ -113,7 +113,7 @@ public class SnpCallRateByCoverageWalker extends LocusWalker<List<String>, Strin
double nextVrsBest = 0;
double nextVrsRef = 0;
char ref = locus.getReference();
char ref = locus.getReference().charAt(0);
if (locus instanceof ReadBacked) {
readDepth = ((ReadBacked)locus).getReadCount();

View File

@ -63,7 +63,7 @@ public class HapMap2VCF extends RodWalker<Integer, Integer> {
VCFGenotypeEncoding refAllele = new VCFGenotypeEncoding(ref_allele.toString());
// Create a new record
VCFRecord record = new VCFRecord(ref_allele, context.getLocation(), "GT");
VCFRecord record = new VCFRecord(Character.toString(ref_allele), context.getLocation(), "GT");
// Record each sample's genotype info
String[] hapmap_rod_genotypes = hapmap_rod.getGenotypes();

View File

@ -2,14 +2,15 @@ package org.broadinstitute.sting.playground.gatk.walkers.variantstovcf;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.*;
import org.broadinstitute.sting.gatk.walkers.annotator.HardyWeinberg;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.RefWalker;
import org.broadinstitute.sting.gatk.refdata.PlinkRod;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
import org.broadinstitute.sting.utils.genotype.Genotype;
import org.broadinstitute.sting.utils.genotype.Variation;
import org.broadinstitute.sting.utils.genotype.vcf.*;
import java.io.BufferedReader;
@ -21,9 +22,7 @@ import java.util.*;
/**
* Converts Sequenom files to a VCF annotated with QC metrics (HW-equilibrium, % failed probes)
*/
public class PlinkToVCF extends RefWalker<VCFRecord,Integer> {
@Argument(fullName="sequenomePedFile", shortName="sPed", doc="The sequenome file from which to generate a VCF", required=true)
public File seqFile = null;
public class PlinkToVCF extends RodWalker<VCFRecord,Integer> {
@Argument(fullName="outputVCF", shortName="vcf", doc="The VCF file to write to", required=true)
public File vcfFile = null;
@Argument(fullName="numberOfSamples", shortName="ns", doc="The number of samples sequenced", required=false)
@ -44,7 +43,6 @@ public class PlinkToVCF extends RefWalker<VCFRecord,Integer> {
"FID","IID","PAT","MAT","SEX","PHENOTYPE"));
private final int INIT_NUMBER_OF_POPULATIONS = 10;
private final int DEFAULT_QUALITY = 20;
private HashMap<String, SequenomVariantInfo> sequenomResults = new HashMap<String,SequenomVariantInfo>();
private ArrayList<String> sampleNames = new ArrayList<String>(nSamples);
private VCFGenotypeWriterAdapter vcfWriter;
private final HardyWeinberg HWCalc = new HardyWeinberg();
@ -52,17 +50,15 @@ public class PlinkToVCF extends RefWalker<VCFRecord,Integer> {
private HashMap<String,String> samplesToPopulation;
public void initialize() {
//System.out.println("Initializing... parse sequenom file");
parseSequenomFile(seqFile);
vcfWriter = new VCFGenotypeWriterAdapter(vcfFile);
if ( useSmartHardy ) {
samplesToPopulation = parsePopulationFile(popFile);
}
Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
hInfo.addAll(VCFUtils.getHeaderFields(getToolkit()));
hInfo.add(new VCFHeaderLine("source", "Sequenom2VCF"));
hInfo.add(new VCFHeaderLine("source", "PlinkToVCF"));
hInfo.add(new VCFHeaderLine("reference", getToolkit().getArguments().referenceFile.getName()));
vcfWriter.writeHeader(new TreeSet<String>(sampleNames),hInfo);
vcfWriter.writeHeader(new TreeSet<String>(sampleNames), hInfo);
nSamples = sampleNames.size();
}
@ -72,21 +68,32 @@ public class PlinkToVCF extends RefWalker<VCFRecord,Integer> {
}
public VCFRecord map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
if ( sequenomResults.containsKey(context.getLocation().toString()) ) {
SequenomVariantInfo varInfo = sequenomResults.remove(context.getLocation().toString());
return addVariantInformationToCall(ref,varInfo);
} else {
if ( tracker == null )
return null;
// get the Plink rod at this locus if there is one
PlinkRod plinkRod = null;
Iterator<ReferenceOrderedDatum> rods = tracker.getAllRods().iterator();
while (rods.hasNext()) {
ReferenceOrderedDatum rod = rods.next();
if ( rod instanceof PlinkRod ) {
plinkRod = (PlinkRod)rod;
break;
}
}
if ( plinkRod == null )
return null;
return addVariantInformationToCall(ref, plinkRod);
}
public Integer reduce(VCFRecord call, Integer numVariants) {
if ( call == null ) {
return numVariants;
} else {
if ( call != null ) {
numVariants++;
printToVCF(call);
return 1 + numVariants;
}
return numVariants;
}
public void onTraversalDone(Integer finalReduce) {
@ -106,51 +113,25 @@ public class PlinkToVCF extends RefWalker<VCFRecord,Integer> {
}
}
private VCFRecord addVariantInformationToCall(ReferenceContext ref, SequenomVariantInfo varInfo) {
int numNoCalls = 0;
int numHomNonrefCalls = 0;
int numNonrefAlleles = 0;
private VCFRecord addVariantInformationToCall(ReferenceContext ref, PlinkRod plinkRod) {
int sampleNumber = 0;
VCFRecord record = new VCFRecord(ref.getBase(),ref.getLocus(),"GT");
VariantContext vContext = plinkRod.getVariantContext();
VCFRecord record = VariantContextAdaptors.toVCF(vContext);
record.setGenotypeFormatString("GT");
for ( String genTypeStr : varInfo.getGenotypes() ) {
if ( genTypeStr.indexOf("0") == -1 ) {
VCFGenotypeEncoding allele1 = new VCFGenotypeEncoding(genTypeStr.substring(0,1));
VCFGenotypeEncoding allele2 = new VCFGenotypeEncoding(genTypeStr.substring(1));
List<VCFGenotypeEncoding> alleles = new ArrayList<VCFGenotypeEncoding>(2);
alleles.add(allele1);
alleles.add(allele2);
int numNoCalls = vContext.getNoCallCount();
int numHomVarCalls = vContext.getHomVarCount();
int numHetCalls = vContext.getHetCount();
VCFGenotypeRecord genotype = new VCFGenotypeRecord(sampleNames.get(sampleNumber), alleles, VCFGenotypeRecord.PHASE.UNPHASED);
genotype.setField("GQ",String.format("%d",DEFAULT_QUALITY));
if ( genotype.isVariant(ref.getBase()) ) {
if ( genotype.isHom() ) {
numHomNonrefCalls++;
numNonrefAlleles+=2;
record.addAlternateBase(allele1);
} else {
numNonrefAlleles++;
record.addAlternateBase(allele1.getBases().equalsIgnoreCase(String.format("%c",ref.getBase())) ? allele2 : allele1);
}
}
record.addGenotypeRecord(genotype);
} else {
numNoCalls++;
}
sampleNumber++;
}
double noCallProp = ( (double) numNoCalls )/( (double) sampleNames.size());
double homNonRProp = ( (double) numHomNonrefCalls )/( (double) sampleNames.size() - numNoCalls);
double homNonRProp = ( (double) numHomVarCalls )/( (double) sampleNames.size() - numNoCalls);
record.setQual(DEFAULT_QUALITY);
String hw = hardyWeinbergCalculation(ref,record);
double hwScore = hw != null ? Double.valueOf(hw) : -0.0;
record.addInfoFields(generateInfoField(record, numNoCalls,numHomNonrefCalls,numNonrefAlleles,ref, varInfo, hwScore));
// TODO -- record.addInfoFields(generateInfoField(record, numNoCalls,numHomVarCalls,numNonrefAlleles,ref, plinkRod, hwScore));
if ( hwScore > maxHardy ) {
record.setFilterString("Hardy-Weinberg");
} else if ( noCallProp > maxNoCall ) {
@ -174,7 +155,7 @@ public class PlinkToVCF extends RefWalker<VCFRecord,Integer> {
}
private Map<String,String> generateInfoField(VCFRecord rec, int nocall, int homnonref, int allnonref,
ReferenceContext ref, SequenomVariantInfo info, double hwScore) {
ReferenceContext ref, PlinkRod info, double hwScore) {
double propNoCall = ( ( double ) nocall / (double) nSamples );
double propHomNR = ( (double) homnonref / (double) nSamples );
HashMap<String,String> infoMap = new HashMap<String,String>(1);
@ -206,13 +187,13 @@ public class PlinkToVCF extends RefWalker<VCFRecord,Integer> {
for ( String name : sampleNames ) {
String pop = samplesToPopulation.get(name);
if ( rec.getGenotype(name) != null ) {
genotypesByPopulation.get(pop).add(rec.getGenotype(name));
// TODO -- genotypesByPopulation.get(pop).add(rec.getGenotype(name));
}
}
for ( String population : samplesToPopulation.values() ) {
VCFVariationCall v = new VCFVariationCall(ref.getBase(),ref.getLocus(),VCFVariationCall.VARIANT_TYPE.SNP);
v.setGenotypeCalls(genotypesByPopulation.get(population));
// TODO -- v.setGenotypeCalls(genotypesByPopulation.get(population));
hardyWeinbergByPopulation.put(population,HWCalc.annotate(null,ref,null,v));
}
@ -229,111 +210,6 @@ public class PlinkToVCF extends RefWalker<VCFRecord,Integer> {
return String.format("%s",maxH);
}
private void parseSequenomFile(File sequenomFile) {
BufferedReader reader;
try {
reader = new BufferedReader(new FileReader(sequenomFile));
} catch ( IOException e ) {
throw new StingException("Sequenom file could not be opened",e);
}
String header;
try {
header = reader.readLine();
//System.out.println("Read header line, it was "+header);
} catch (IOException e) {
throw new StingException(e.getMessage(),e);
}
HashMap<Integer,SequenomVariantInfo> sequenomVariants = parseSequenomHeader(header);
try {
String line;
do {
line = reader.readLine();
//ystem.out.println("Read line, it was"+line);
if ( line != null ) {
//System.out.println("Parsing line...");
parseSequenomLine(sequenomVariants,line);
}
} while ( line != null);
reader.close();
convertToLocusMap(sequenomVariants);
} catch ( IOException e) {
throw new StingException("Error reading sequenom file", e);
}
}
private HashMap<Integer,SequenomVariantInfo> parseSequenomHeader(String header) {
// the header will give us all the probe names and contigs
//System.out.println("Parsing header");
HashMap<Integer,SequenomVariantInfo> variantsByFileOffset = new HashMap<Integer,SequenomVariantInfo>();
String[] fields = header.split("\t");
int fieldOffset = 0;
for ( String entry : fields ) {
if ( ! HEADER_FIELDS.contains(entry) ) {
//System.out.println(entry);
// actually a SNP
String snpName = entry.split("\\|")[1];
//System.out.println("Entry: "+entry+" Name: "+snpName);
variantsByFileOffset.put(fieldOffset,new SequenomVariantInfo(snpName,nSamples));
//System.out.println("Adding entry for offset "+fieldOffset);
}
fieldOffset++;
}
return variantsByFileOffset;
}
private void parseSequenomLine(HashMap<Integer,SequenomVariantInfo> variants, String line) {
if ( line == null ) {
// early return
//System.out.println("Line was null in file");
return;
}
String[] entries = line.split("\t");
int offset = 0;
if ( entries[1].equalsIgnoreCase("empty")) {
return;
}
sampleNames.add(entries[1]);
for ( String entry : entries ) {
if ( variants.containsKey(offset) ) { // actual SNP
variants.get(offset).addGenotype(entry);
//System.out.println("Added: "+entry+"To "+offset);
}
offset++;
}
}
private void convertToLocusMap(HashMap<Integer,SequenomVariantInfo> variants) {
for ( SequenomVariantInfo variant : variants.values() ) {
//System.out.println("Variant name: "+variant.getName());
String loc = genomeLocFromVariantName(variant.getName());
//System.out.println(variant.getName());
if ( loc == null ) {
throw new StingException("Genome locus was null");
}
sequenomResults.put(loc,variant);
}
variants.clear();
variants = null;
}
private String genomeLocFromVariantName(String name) {
String[] nameInfo = name.split("_");
//System.out.println(name);
//System.out.println(nameInfo[0]);
String chr = nameInfo[0].substring(1); // format: c3 becomes 3
String pos = nameInfo[1].substring(1); // format: p9961104 becomes 9961104
if ( useb36contigs ) {
return chr+":"+pos;
} else {
return "chr"+chr+":"+pos;
}
}
private HashMap<String,String> parsePopulationFile(File file) {
HashMap<String,String> samplesToPopulation = new HashMap<String,String>();
try {
@ -352,29 +228,4 @@ public class PlinkToVCF extends RefWalker<VCFRecord,Integer> {
return samplesToPopulation;
}
}
class SequenomVariantInfo {
private ArrayList<String> genotypes;
private String variantName;
public SequenomVariantInfo(String name, int nPeople) {
genotypes = new ArrayList<String>(nPeople);
variantName = name;
}
public void addGenotype(String genotype) {
String[] alleles = genotype.split(" ");
genotypes.add(alleles[0]+alleles[1]);
}
public String getName() {
return variantName;
}
public ArrayList<String> getGenotypes() {
return genotypes;
}
}

View File

@ -186,7 +186,7 @@ public class VariantsToVCF extends RefWalker<Integer, Integer> {
if (dbsnp != null) info.put("DB","1");
if (dbsnp != null && dbsnp.isHapmap()) info.put("H2","1");
return new VCFRecord(ref.getBase(),
return new VCFRecord(Character.toString(ref.getBase()),
context.getContig(),
(int) context.getPosition(),
(dbsnp == null) ? "." : dbsnp.getRS_ID(),

View File

@ -90,7 +90,7 @@ public class VCFSubsetWalker extends RodWalker<ArrayList<VCFRecord>, VCFWriter>
}
}
VCFRecord subset = new VCFRecord(record.getReferenceBase(),
VCFRecord subset = new VCFRecord(record.getReference(),
record.getLocation().getContig(),
(int) record.getLocation().getStart(),
record.getID(),
@ -110,7 +110,7 @@ public class VCFSubsetWalker extends RodWalker<ArrayList<VCFRecord>, VCFWriter>
boolean isVariant = false;
for (VCFGenotypeEncoding ge : ((VCFGenotypeRecord)subset.getGenotypes().get(0)).getAlleles()) {
if (record.getReferenceBase() != ge.getBases().charAt(0)) {
if (!record.getReference().equals(ge.getBases())) {
isVariant = true;
}
}

View File

@ -73,7 +73,7 @@ class VCFCallRates extends CommandLineProgram
record = reader.next();
}
char ref = record.getReferenceBase();
char ref = record.getReference().charAt(0);
String[] new_sample_names = record.getSampleNames();
if (new_sample_names.length != sample_names.length) { throw new RuntimeException(); }

View File

@ -91,7 +91,7 @@ class VCFSequenomAnalysis extends CommandLineProgram
continue;
}
char ref = record1.getReferenceBase();
char ref = record1.getReference().charAt(0);
char alt = VCFTool.getAlt(record2);
int n_total_sequenom = VCFTool.Compute_n_total(record1);

View File

@ -95,7 +95,7 @@ class VCFSequenomAnalysis2 extends CommandLineProgram
String[] sample_names = record2.getSampleNames();
char ref = record1.getReferenceBase();
char ref = record1.getReference().charAt(0);
char alt = VCFTool.getAlt(record2);
int n_total_sequenom = VCFTool.Compute_n_total(record1, sample_names);
@ -211,8 +211,8 @@ class VCFSequenomAnalysis2 extends CommandLineProgram
Arrays.sort(c);
g = new String(c);
if (g.equals("..")) { continue; }
if (g.charAt(0) == sequencing.getReferenceBase()) { n_ref += 1; } else { n_alt += 1; singleton_name = sample_names[i]; }
if (g.charAt(1) == sequencing.getReferenceBase()) { n_ref += 1; } else { n_alt += 1; singleton_name = sample_names[i]; }
if (g.charAt(0) == sequencing.getReference().charAt(0)) { n_ref += 1; } else { n_alt += 1; singleton_name = sample_names[i]; }
if (g.charAt(1) == sequencing.getReference().charAt(0)) { n_ref += 1; } else { n_alt += 1; singleton_name = sample_names[i]; }
}
if (n_alt != 1) { throw new RuntimeException(); }
if (singleton_name.equals("")) { throw new RuntimeException(); }
@ -234,8 +234,8 @@ class VCFSequenomAnalysis2 extends CommandLineProgram
Arrays.sort(c);
g = new String(c);
if (g.equals("..")) { continue; }
if (g.charAt(0) == sequenom.getReferenceBase()) { n_ref += 1; } else { n_alt += 1; singleton_name = sample_names[i]; }
if (g.charAt(1) == sequenom.getReferenceBase()) { n_ref += 1; } else { n_alt += 1; singleton_name = sample_names[i]; }
if (g.charAt(0) == sequenom.getReference().charAt(0)) { n_ref += 1; } else { n_alt += 1; singleton_name = sample_names[i]; }
if (g.charAt(1) == sequenom.getReference().charAt(0)) { n_ref += 1; } else { n_alt += 1; singleton_name = sample_names[i]; }
break;
}
}
@ -263,8 +263,8 @@ class VCFSequenomAnalysis2 extends CommandLineProgram
Arrays.sort(c);
g = new String(c);
if (g.equals("..")) { continue; }
if (g.charAt(0) == sequencing.getReferenceBase()) { n_ref += 1; } else { n_alt += 1; }
if (g.charAt(1) == sequencing.getReferenceBase()) { n_ref += 1; } else { n_alt += 1; }
if (g.charAt(0) == sequencing.getReference().charAt(0)) { n_ref += 1; } else { n_alt += 1; }
if (g.charAt(1) == sequencing.getReference().charAt(0)) { n_ref += 1; } else { n_alt += 1; }
if (n_alt == 1) { het_samples.add(sample_names[i]); }
}
@ -291,8 +291,8 @@ class VCFSequenomAnalysis2 extends CommandLineProgram
Arrays.sort(c);
g = new String(c);
if (g.equals("..")) { dropped_hets += 1; continue; }
if (g.charAt(0) == sequenom.getReferenceBase()) { n_ref += 1; } else { n_alt += 1; }
if (g.charAt(1) == sequenom.getReferenceBase()) { n_ref += 1; } else { n_alt += 1; }
if (g.charAt(0) == sequenom.getReference().charAt(0)) { n_ref += 1; } else { n_alt += 1; }
if (g.charAt(1) == sequenom.getReference().charAt(0)) { n_ref += 1; } else { n_alt += 1; }
if (n_alt == 1) { matched_hets += 1; }
else { mismatched_hets += 1; }
}

View File

@ -263,7 +263,7 @@ class CheckRefFields extends CommandLineProgram
}
long offset = record.getLocation().getStart();
char vcf_ref_base = record.getReferenceBase();
char vcf_ref_base = record.getReference().charAt(0);
char fasta_ref_base = (char)ref_seq[(int)offset-1];
List<VCFGenotypeEncoding> alleles = record.getAlternateAlleles();
@ -338,7 +338,7 @@ class FixRefFields extends CommandLineProgram
}
long offset = record.getLocation().getStart();
char vcf_ref_base = record.getReferenceBase();
char vcf_ref_base = record.getReference().charAt(0);
char fasta_ref_base = (char)ref_seq[(int)offset-1];
List<VCFGenotypeEncoding> alleles = record.getAlternateAlleles();
@ -524,7 +524,7 @@ class PrintGQ extends CommandLineProgram
record = reader.next();
}
char ref = record.getReferenceBase();
char ref = record.getReference().charAt(0);
String[] sample_names = record.getSampleNames();
@ -617,7 +617,7 @@ class VCFSimpleStats extends CommandLineProgram
record1 = reader1.next();
}
char ref = record1.getReferenceBase();
char ref = record1.getReference().charAt(0);
String[] sample_names1 = record1.getSampleNames();
@ -845,7 +845,7 @@ class VCFConcordance extends CommandLineProgram
}
char ref = record1.getReferenceBase();
char ref = record1.getReference().charAt(0);
String[] sample_names1 = record1.getSampleNames();
String[] sample_names2 = record2.getSampleNames();
@ -1260,7 +1260,7 @@ public class VCFTool
public static boolean isTransition(VCFRecord record)
{
char ref = record.getReferenceBase();
char ref = record.getReference().charAt(0);
List<VCFGenotypeEncoding> alleles = record.getAlternateAlleles();
char alt = alleles.get(0).getBases().charAt(0);
@ -1315,8 +1315,8 @@ public class VCFTool
Arrays.sort(c);
g = new String(c);
if (g.equals("..")) { continue; }
if (g.charAt(0) == record.getReferenceBase()) { n_ref += 1; } else { n_alt += 1; }
if (g.charAt(1) == record.getReferenceBase()) { n_ref += 1; } else { n_alt += 1; }
if (g.charAt(0) == record.getReference().charAt(0)) { n_ref += 1; } else { n_alt += 1; }
if (g.charAt(1) == record.getReference().charAt(0)) { n_ref += 1; } else { n_alt += 1; }
}
return n_alt + n_ref;
}
@ -1359,8 +1359,8 @@ public class VCFTool
Arrays.sort(c);
g = new String(c);
if (g.equals("..")) { continue; }
if (g.charAt(0) == record.getReferenceBase()) { n_ref += 1; } else { n_alt += 1; }
if (g.charAt(1) == record.getReferenceBase()) { n_ref += 1; } else { n_alt += 1; }
if (g.charAt(0) == record.getReference().charAt(0)) { n_ref += 1; } else { n_alt += 1; }
if (g.charAt(1) == record.getReference().charAt(0)) { n_ref += 1; } else { n_alt += 1; }
}
return n_alt;
}
@ -1406,8 +1406,8 @@ public class VCFTool
Arrays.sort(c);
g = new String(c);
if (g.equals("..")) { continue; }
if (g.charAt(0) == record.getReferenceBase()) { n_ref += 1; } else { n_alt += 1; }
if (g.charAt(1) == record.getReferenceBase()) { n_ref += 1; } else { n_alt += 1; }
if (g.charAt(0) == record.getReference().charAt(0)) { n_ref += 1; } else { n_alt += 1; }
if (g.charAt(1) == record.getReference().charAt(0)) { n_ref += 1; } else { n_alt += 1; }
if (n_alt == 1) { n_het += 1; }
}
return n_het;
@ -1476,8 +1476,8 @@ public class VCFTool
Arrays.sort(c);
g = new String(c);
if (g.equals("..")) { continue; }
if (g.charAt(0) == record.getReferenceBase()) { n_ref += 1; } else { n_alt += 1; }
if (g.charAt(1) == record.getReferenceBase()) { n_ref += 1; } else { n_alt += 1; }
if (g.charAt(0) == record.getReference().charAt(0)) { n_ref += 1; } else { n_alt += 1; }
if (g.charAt(1) == record.getReference().charAt(0)) { n_ref += 1; } else { n_alt += 1; }
if (n_ref == 2) { ref += 1; }
else if (n_ref == 1 && n_alt == 1) { het += 1; }

View File

@ -15,8 +15,8 @@ public abstract class AlleleConstrainedGenotype implements Genotype {
private char ref = NO_CONSTRAINT;
private char alt = NO_CONSTRAINT;
public AlleleConstrainedGenotype(char ref) {
this.ref = ref;
public AlleleConstrainedGenotype(String ref) {
this.ref = ref.charAt(0);
}
/**

View File

@ -24,8 +24,8 @@ public class BasicGenotype implements Genotype {
// our location
private GenomeLoc mLocation;
// the reference base.
private char mRef;
// the reference bases
private String mRef;
// the confidence score
private double mNegLog10PError;
@ -38,7 +38,7 @@ public class BasicGenotype implements Genotype {
* @param ref the reference base as a char
* @param negLog10PError the confidence score
*/
public BasicGenotype(GenomeLoc location, String genotype, char ref, double negLog10PError) {
public BasicGenotype(GenomeLoc location, String genotype, String ref, double negLog10PError) {
mNegLog10PError = negLog10PError;
for ( char base : genotype.toCharArray() ) {
@ -56,7 +56,6 @@ public class BasicGenotype implements Genotype {
*
* @return the negitive log based error estimate
*/
@Override
public double getNegLog10PError() {
return mNegLog10PError;
}
@ -66,7 +65,6 @@ public class BasicGenotype implements Genotype {
*
* @return the bases, as a string
*/
@Override
public String getBases() {
return mGenotype;
}
@ -76,7 +74,6 @@ public class BasicGenotype implements Genotype {
*
* @return the ploidy value
*/
@Override
public int getPloidy() {
return mGenotype.length();
}
@ -86,7 +83,6 @@ public class BasicGenotype implements Genotype {
*
* @return true if we're homozygous, false otherwise
*/
@Override
public boolean isHom() {
if (mGenotype.length() < 1)
return false;
@ -106,7 +102,6 @@ public class BasicGenotype implements Genotype {
*
* @return true if we're het, false otherwise
*/
@Override
public boolean isHet() {
if (mGenotype.length() < 1)
return false;
@ -118,7 +113,6 @@ public class BasicGenotype implements Genotype {
*
* @return a GenomeLoc representing the location
*/
@Override
public GenomeLoc getLocation() {
return mLocation;
}
@ -128,7 +122,6 @@ public class BasicGenotype implements Genotype {
*
* @return true is a SNP
*/
@Override
public boolean isPointGenotype() {
return true;
}
@ -140,7 +133,6 @@ public class BasicGenotype implements Genotype {
*
* @return true if we're a variant
*/
@Override
public boolean isVariant(char ref) {
return !(mGenotype.charAt(0) == ref && isHom());
}
@ -150,8 +142,7 @@ public class BasicGenotype implements Genotype {
*
* @return a character, representing the reference base
*/
@Override
public char getReference() {
public String getReference() {
return mRef;
}
@ -160,7 +151,6 @@ public class BasicGenotype implements Genotype {
*
* @return the variant
*/
@Override
public Variation toVariation(char ref) {
if (!isVariant(ref)) throw new IllegalStateException("this genotype is not a variant");
return new BasicVariation(this.getBases(), String.valueOf(ref), this.getBases().length(), mLocation, mNegLog10PError);

View File

@ -79,7 +79,7 @@ public interface Genotype {
* get the reference base.
* @return a character, representing the reference base
*/
public char getReference();
public String getReference();
/**
* return this genotype as a variant

View File

@ -85,7 +85,7 @@ public class GenotypeWriterFactory {
* @param loc the location
* @return an unpopulated genotype call object
*/
public static GenotypeCall createSupportedGenotypeCall(GENOTYPE_FORMAT format, char ref, GenomeLoc loc) {
public static GenotypeCall createSupportedGenotypeCall(GENOTYPE_FORMAT format, String ref, GenomeLoc loc) {
switch (format) {
case VCF:
return new VCFGenotypeCall(ref, loc);

View File

@ -66,7 +66,6 @@ public class GeliAdapter implements GeliGenotypeWriter {
*
* @param fileHeader the file header to write out
*/
@Override
public void writeHeader(final SAMFileHeader fileHeader) {
this.writer = GeliFileWriter.newInstanceForPresortedRecords(writeTo, fileHeader);
}
@ -131,7 +130,7 @@ public class GeliAdapter implements GeliGenotypeWriter {
throw new IllegalArgumentException("Only GeliGenotypeCalls should be passed in to the Geli writers");
GeliGenotypeCall gCall = (GeliGenotypeCall)call;
char ref = gCall.getReference();
char ref = gCall.getReference().charAt(0);
int readCount = gCall.getReadCount();
double maxMappingQual = 0;
if ( gCall.getPileup() != null ) {

View File

@ -34,9 +34,9 @@ public class GeliGenotypeCall extends AlleleConstrainedGenotype implements Genot
* @param ref the ref character
* @param loc the genome loc
*/
public GeliGenotypeCall(char ref, GenomeLoc loc) {
public GeliGenotypeCall(String ref, GenomeLoc loc) {
super(ref);
mRefBase = ref;
mRefBase = ref.charAt(0);
mLocation = loc;
// fill in empty data
@ -44,12 +44,12 @@ public class GeliGenotypeCall extends AlleleConstrainedGenotype implements Genot
Arrays.fill(mPosteriors, Double.MIN_VALUE);
}
public GeliGenotypeCall(char ref, GenomeLoc loc, String genotype, double negLog10PError) {
public GeliGenotypeCall(String ref, GenomeLoc loc, String genotype, double negLog10PError) {
super(ref);
mRefBase = ref;
mRefBase = ref.charAt(0);
mLocation = loc;
mBestGenotype = DiploidGenotype.valueOf(genotype);
mRefGenotype = DiploidGenotype.createHomGenotype(ref);
mRefGenotype = DiploidGenotype.createHomGenotype(mRefBase);
mNextGenotype = mRefGenotype;
// set general posteriors to min double value
@ -114,7 +114,7 @@ public class GeliGenotypeCall extends AlleleConstrainedGenotype implements Genot
private void lazyEval() {
if (mBestGenotype == null) {
char ref = this.getReference();
char ref = this.getReference().charAt(0);
char alt = this.getAlternateAllele();
mRefGenotype = DiploidGenotype.createHomGenotype(ref);
@ -285,8 +285,8 @@ public class GeliGenotypeCall extends AlleleConstrainedGenotype implements Genot
*
* @return the reference character
*/
public char getReference() {
return mRefBase;
public String getReference() {
return Character.toString(mRefBase);
}
/**

View File

@ -65,7 +65,7 @@ public class GeliTextWriter implements GeliGenotypeWriter {
throw new IllegalArgumentException("Only GeliGenotypeCalls should be passed in to the Geli writers");
GeliGenotypeCall gCall = (GeliGenotypeCall)call;
char ref = gCall.getReference();
char ref = gCall.getReference().charAt(0);
double[] posteriors = gCall.getPosteriors();
double[] lks;

View File

@ -33,9 +33,9 @@ public class GLFGenotypeCall implements GenotypeCall, ReadBacked, LikelihoodsBac
* @param ref the ref character
* @param loc the genome loc
*/
public GLFGenotypeCall(char ref, GenomeLoc loc) {
mRefBase = ref;
mGenotype = Utils.dupString(ref, 2);
public GLFGenotypeCall(String ref, GenomeLoc loc) {
mRefBase = ref.charAt(0);
mGenotype = Utils.dupString(mRefBase, 2);
// fill in empty data
mLocation = loc;
@ -192,8 +192,8 @@ public class GLFGenotypeCall implements GenotypeCall, ReadBacked, LikelihoodsBac
*
* @return the reference character
*/
public char getReference() {
return mRefBase;
public String getReference() {
return Character.toString(mRefBase);
}
/**

View File

@ -149,7 +149,7 @@ public class GLFWriter implements GLFGenotypeWriter {
throw new IllegalArgumentException("Only GLFGenotypeCall should be passed in to the GLF writers");
GLFGenotypeCall gCall = (GLFGenotypeCall) call;
char ref = gCall.getReference();
char ref = gCall.getReference().charAt(0);
// get likelihood information if available
LikelihoodObject obj = new LikelihoodObject(gCall.getLikelihoods(), LikelihoodObject.LIKELIHOOD_TYPE.LOG);

View File

@ -43,7 +43,6 @@ public class TabularLFWriter implements GenotypeWriter {
*
* @param locus the locus to add
*/
@Override
public void addGenotypeCall(Genotype locus) {
double likelihoods[];
int readDepth = -1;
@ -56,7 +55,7 @@ public class TabularLFWriter implements GenotypeWriter {
likelihoods = ((LikelihoodsBacked) locus).getLikelihoods();
}
char ref = locus.getReference();
char ref = locus.getReference().charAt(0);
if (locus instanceof ReadBacked) {
readDepth = ((ReadBacked)locus).getReadCount();
@ -84,13 +83,11 @@ public class TabularLFWriter implements GenotypeWriter {
*
* @param position
*/
@Override
public void addNoCall(int position) {
throw new StingException("TabularLFWriter doesn't support no-calls");
}
/** finish writing, closing any open files. */
@Override
public void close() {
if (this.outStream != null) {
outStream.close();
@ -103,13 +100,11 @@ public class TabularLFWriter implements GenotypeWriter {
*
* @param genotypes the list of genotypes, that are backed by sample information
*/
@Override
public void addMultiSampleCall(List<Genotype> genotypes, VariationCall metadata) {
throw new UnsupportedOperationException("Tabular LF doesn't support multisample calls");
}
/** @return true if we support multisample, false otherwise */
@Override
public boolean supportsMultiSample() {
return false;
}

View File

@ -13,7 +13,7 @@ import org.broadinstitute.sting.utils.genotype.*;
* The implementation of the genotype interface, specific to VCF
*/
public class VCFGenotypeCall extends AlleleConstrainedGenotype implements GenotypeCall, ReadBacked, SampleBacked, Cloneable {
private final char mRefBase;
private final String mRef;
private final GenomeLoc mLocation;
private ReadBackedPileup mPileup = null;
@ -28,19 +28,19 @@ public class VCFGenotypeCall extends AlleleConstrainedGenotype implements Genoty
// the sample name, used to propulate the SampleBacked interface
private String mSampleName;
public VCFGenotypeCall(char ref, GenomeLoc loc) {
public VCFGenotypeCall(String ref, GenomeLoc loc) {
super(ref);
mRefBase = ref;
mRef = ref;
mLocation = loc;
// fill in empty data
mGenotype = DiploidGenotype.createHomGenotype(ref);
mGenotype = DiploidGenotype.createHomGenotype(ref.charAt(0));
mSampleName = "";
}
public VCFGenotypeCall(char ref, GenomeLoc loc, DiploidGenotype genotype, double negLog10PError, int coverage, String sample) {
public VCFGenotypeCall(String ref, GenomeLoc loc, DiploidGenotype genotype, double negLog10PError, int coverage, String sample) {
super(ref);
mRefBase = ref;
mRef = ref;
mLocation = loc;
mGenotype = genotype;
mNegLog10PError = negLog10PError;
@ -80,12 +80,12 @@ public class VCFGenotypeCall extends AlleleConstrainedGenotype implements Genoty
return mGenotype.equals(otherCall.mGenotype) &&
mNegLog10PError == otherCall.mNegLog10PError &&
mLocation.equals(otherCall.mLocation) &&
mRefBase == otherCall.mRefBase;
mRef.equals(otherCall.mRef);
}
public String toString() {
return String.format("%s best=%s ref=%s depth=%d negLog10PError=%.2f",
getLocation(), mGenotype, mRefBase, getReadCount(), getNegLog10PError());
getLocation(), mGenotype, mRef, getReadCount(), getNegLog10PError());
}
/**
@ -168,7 +168,7 @@ public class VCFGenotypeCall extends AlleleConstrainedGenotype implements Genoty
* @return true if we're a variant
*/
public boolean isVariant() {
return isVariant(mRefBase);
return isVariant(mRef.charAt(0));
}
/**
@ -209,12 +209,12 @@ public class VCFGenotypeCall extends AlleleConstrainedGenotype implements Genoty
}
/**
* get the reference character
* get the reference string
*
* @return the reference character
* @return the reference string
*/
public char getReference() {
return mRefBase;
public String getReference() {
return mRef;
}
/**

View File

@ -139,8 +139,8 @@ public class VCFGenotypeRecord implements Genotype, SampleBacked {
return mRecord != null ? mRecord.getLocation() : null;
}
public char getReference() {
return mRecord != null ? mRecord.getReferenceBase() : 'N';
public String getReference() {
return mRecord != null ? mRecord.getReference() : "N";
}
public Variation toVariation(char ref) {

View File

@ -102,7 +102,7 @@ public class VCFGenotypeWriterAdapter implements VCFGenotypeWriter {
if ( locusdata == null )
throw new IllegalArgumentException("Unable to parse out the current location: genotype array must contain at least one entry or have variation data");
params.setLocations(locusdata.getLocation(), locusdata.getReference().charAt(0));
params.setLocations(locusdata.getLocation(), locusdata.getReference());
// if there is no genotype data, we'll also need to set an alternate allele
if ( locusdata.isBiallelic() && locusdata.isSNP() )
@ -161,7 +161,7 @@ public class VCFGenotypeWriterAdapter implements VCFGenotypeWriter {
if ( locusdata != null )
dbSnpID = ((VCFVariationCall)locusdata).getID();
VCFRecord vcfRecord = new VCFRecord(params.getReferenceBase(),
VCFRecord vcfRecord = new VCFRecord(params.getReferenceBases(),
params.getContig(),
params.getPosition(),
(dbSnpID == null ? VCFRecord.EMPTY_ID_FIELD : dbSnpID),

View File

@ -12,7 +12,7 @@ import java.util.ArrayList;
* we feed to the VCF (like ensuring the same position for each genotype in a call).
*/
class VCFParameters {
private char referenceBase = '0';
private String referenceBases = "0";
private int position = 0;
private String contig = null;
private boolean initialized = false;
@ -21,7 +21,7 @@ class VCFParameters {
private List<VCFGenotypeEncoding> alternateBases = new ArrayList<VCFGenotypeEncoding>();
private List<Integer> alleleCounts = new ArrayList<Integer>();
public void setLocations(GenomeLoc location, char refBase) {
public void setLocations(GenomeLoc location, String refBases) {
// if we haven't set it up, we initialize the object
if (!initialized) {
initialized = true;
@ -30,14 +30,14 @@ class VCFParameters {
if (location.getStart() != location.getStop()) {
throw new IllegalArgumentException("The start and stop locations must be the same");
}
this.referenceBase = refBase;
this.referenceBases = refBases;
} else {
if (!contig.equals(this.contig))
throw new IllegalArgumentException("The contig name has to be the same at a single locus");
if (location.getStart() != this.position)
throw new IllegalArgumentException("The position has to be the same at a single locus");
if (refBase != this.referenceBase)
throw new IllegalArgumentException("The reference base name has to be the same at a single locus");
if (refBases != this.referenceBases)
throw new IllegalArgumentException("The reference has to be the same at a single locus");
}
}
@ -52,8 +52,8 @@ class VCFParameters {
}
/** @return get the reference base */
public char getReferenceBase() {
return referenceBase;
public String getReferenceBases() {
return referenceBases;
}
public void addGenotypeRecord(VCFGenotypeRecord record) {
@ -72,7 +72,7 @@ class VCFParameters {
public void addAlternateBase(VCFGenotypeEncoding base) {
if ( !alternateBases.contains(base) &&
!base.toString().equals(String.valueOf(getReferenceBase()).toUpperCase()) &&
!base.toString().equals(String.valueOf(getReferenceBases()).toUpperCase()) &&
!base.toString().equals(VCFGenotypeRecord.EMPTY_ALLELE) ) {
alternateBases.add(base);
alleleCounts.add(0);

View File

@ -41,7 +41,7 @@ public class VCFRecord implements Variation, VariantBackedByGenotype {
public static final String DOUBLE_PRECISION_FORMAT_STRING = "%.2f";
// the reference base
private char mReferenceBase;
private String mReferenceBases;
// our location
private GenomeLoc mLoc;
// our id
@ -55,7 +55,7 @@ public class VCFRecord implements Variation, VariantBackedByGenotype {
// our info fields -- use a TreeMap to ensure they can be pulled out in order (so it passes integration tests)
private final Map<String, String> mInfoFields = new TreeMap<String, String>();
private final String mGenotypeFormatString;
private String mGenotypeFormatString;
// our genotype sample fields
private final List<Genotype> mGenotypeRecords = new ArrayList<Genotype>();
@ -63,12 +63,12 @@ public class VCFRecord implements Variation, VariantBackedByGenotype {
/**
* given a reference base, a location, and the format string, create a VCF record.
*
* @param referenceBase the reference base to use
* @param referenceBases the reference bases to use
* @param location our genomic location
* @param genotypeFormatString the format string
*/
public VCFRecord(char referenceBase, GenomeLoc location, String genotypeFormatString) {
setReferenceBase(referenceBase);
public VCFRecord(String referenceBases, GenomeLoc location, String genotypeFormatString) {
setReferenceBase(referenceBases);
setLocation(location);
mGenotypeFormatString = genotypeFormatString;
}
@ -99,7 +99,7 @@ public class VCFRecord implements Variation, VariantBackedByGenotype {
/**
* create a VCF record
*
* @param referenceBase the reference base to use
* @param referenceBases the reference bases to use
* @param contig the contig this variant is on
* @param position our position
* @param ID our ID string
@ -110,7 +110,7 @@ public class VCFRecord implements Variation, VariantBackedByGenotype {
* @param genotypeFormatString the format string
* @param genotypeObjects the genotype objects
*/
public VCFRecord(char referenceBase,
public VCFRecord(String referenceBases,
String contig,
long position,
String ID,
@ -120,7 +120,7 @@ public class VCFRecord implements Variation, VariantBackedByGenotype {
Map<String, String> infoFields,
String genotypeFormatString,
List<VCFGenotypeRecord> genotypeObjects) {
setReferenceBase(referenceBase);
setReferenceBase(referenceBases);
setLocation(contig, position);
this.mID = ID;
for (VCFGenotypeEncoding alt : altBases)
@ -155,7 +155,7 @@ public class VCFRecord implements Variation, VariantBackedByGenotype {
case REF:
if (columnValues.get(val).length() != 1)
throw new IllegalArgumentException("Reference base should be a single character");
setReferenceBase(columnValues.get(val).charAt(0));
setReferenceBase(columnValues.get(val));
break;
case ALT:
String values[] = columnValues.get(val).split(",");
@ -215,16 +215,7 @@ public class VCFRecord implements Variation, VariantBackedByGenotype {
* @return either A, T, C, G, or N
*/
public String getReference() {
return Character.toString(mReferenceBase);
}
/**
* get the reference base
*
* @return either A, T, C, G, or N
*/
public char getReferenceBase() {
return mReferenceBase;
return mReferenceBases;
}
/**
@ -333,7 +324,6 @@ public class VCFRecord implements Variation, VariantBackedByGenotype {
}
public boolean isInHapmap() {
boolean inHapmap;
if ( mInfoFields.get(HAPMAP2_KEY) != null && mInfoFields.get(HAPMAP2_KEY).equals("1") ) {
return true;
} else {
@ -350,7 +340,7 @@ public class VCFRecord implements Variation, VariantBackedByGenotype {
public char getReferenceForSNP() {
if ( !isSNP() )
throw new IllegalStateException("This record does not represent a SNP");
return getReferenceBase();
return getReference().charAt(0);
}
/**
@ -460,11 +450,12 @@ public class VCFRecord implements Variation, VariantBackedByGenotype {
return mGenotypeFormatString;
}// the formatting string for our genotype records
public void setReferenceBase(char referenceBase) {
referenceBase = Character.toUpperCase(referenceBase);
if (referenceBase != 'A' && referenceBase != 'C' && referenceBase != 'T' && referenceBase != 'G' && referenceBase != 'N')
throw new IllegalArgumentException("Reference base must be one of A,C,G,T,N, we saw: " + referenceBase);
mReferenceBase = referenceBase;
public void setGenotypeFormatString(String newFormatString) {
mGenotypeFormatString = newFormatString;
}
public void setReferenceBase(String reference) {
mReferenceBases = reference.toUpperCase();
}
public void setLocation(String chrom, long position) {
@ -578,7 +569,7 @@ public class VCFRecord implements Variation, VariantBackedByGenotype {
builder.append(FIELD_SEPERATOR);
builder.append(getID());
builder.append(FIELD_SEPERATOR);
builder.append(getReferenceBase());
builder.append(getReference());
builder.append(FIELD_SEPERATOR);
List<VCFGenotypeEncoding> alts = getAlternateAlleles();
if ( alts.size() > 0 ) {
@ -667,7 +658,7 @@ public class VCFRecord implements Variation, VariantBackedByGenotype {
*/
public boolean equals(VCFRecord other) {
if (!this.mAlts.equals(other.mAlts)) return false;
if (this.mReferenceBase != other.mReferenceBase) return false;
if (!this.mReferenceBases.equals(other.mReferenceBases)) return false;
if (!this.mLoc.equals(other.mLoc)) return false;
if (!this.mID.equals(other.mID)) return false;
if (this.mQual != other.mQual) return false;

View File

@ -124,7 +124,7 @@ public class VCFUtils {
if ( freqsSeen > 0 )
infoFields.put(VCFRecord.ALLELE_FREQUENCY_KEY, String.format("%.2f", (totalFreq/(double)freqsSeen)));
return new VCFRecord(params.getReferenceBase(),
return new VCFRecord(params.getReferenceBases(),
params.getContig(),
params.getPosition(),
(id != null ? id : "."),

View File

@ -14,7 +14,7 @@ import java.io.File;
* @version 0.1
*/
public class AlignerIntegrationTest extends WalkerTest {
@Test
//@Test
public void testBasicAlignment() {
String md5 = "c6d95d8ae707e78fefdaa7375f130995";
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(

View File

@ -26,13 +26,13 @@ public class ConcordanceTruthTableTest extends BaseTest {
ConcordanceTruthTable ctt = new ConcordanceTruthTable(3);
// this will test the count of non-reference alleles at a T/G polymorphic site
Genotype ref1 = new BasicGenotype(null,"GG",'G',30);
Genotype ref2 = new BasicGenotype(null,"GG",'G',30);
Genotype ref3 = new BasicGenotype(null,"GG",'G',30);
Genotype het1 = new BasicGenotype(null,"GT",'G',32);
Genotype het2 = new BasicGenotype(null,"GT",'G',28);
Genotype hom1 = new BasicGenotype(null,"TT",'G',40);
Genotype hom2 = new BasicGenotype(null,"TT",'G',27);
Genotype ref1 = new BasicGenotype(null,"GG","G",30);
Genotype ref2 = new BasicGenotype(null,"GG","G",30);
Genotype ref3 = new BasicGenotype(null,"GG","G",30);
Genotype het1 = new BasicGenotype(null,"GT","G",32);
Genotype het2 = new BasicGenotype(null,"GT","G",28);
Genotype hom1 = new BasicGenotype(null,"TT","G",40);
Genotype hom2 = new BasicGenotype(null,"TT","G",27);
List<Pair<Genotype,Genotype>> oneHom = new ArrayList<Pair<Genotype,Genotype>>(4);
oneHom.add(new Pair<Genotype,Genotype>(ref1,null));

View File

@ -15,12 +15,12 @@ import java.util.List;
* To change this template use File | Settings | File Templates.
*/
public class SequenomToVCFIntegrationTest extends WalkerTest {
@Test
//@Test
public void testSequenomToVCFWithoutSmartHardy() {
String testMD5 = "a16ce402ce4492c8c0552073c0a8bdf5";
String testPedFile = "/humgen/gsa-hpprojects/GATK/data/Validation_Data/Sequenom_Test_File.txt";
String testArgs = "-R "+oneKGLocation+"reference/human_b36_both.fasta -L 1:1000000-2000000 "+
"-T PlinkToVCF -b36contig -ns 10 -sPed "+testPedFile+" -vcf %s";
"-T PlinkToVCF -b36contig -ns 10 -B input,Plink,"+testPedFile+" -vcf %s";
WalkerTest.WalkerTestSpec spec = new WalkerTestSpec(testArgs,1, Arrays.asList(testMD5));
List<File> result = executeTest("TestSequenomToVCFNoSmartHardy",spec).getFirst();
}

View File

@ -36,27 +36,27 @@ public class BasicGenotypeTest extends BaseTest {
@Test
public void testBasicGenotypeIsHom() {
BasicGenotype gt = new BasicGenotype(GenomeLocParser.createGenomeLoc("chr1",1,1),"AA",'A',0);
BasicGenotype gt = new BasicGenotype(GenomeLocParser.createGenomeLoc("chr1",1,1),"AA","A",0);
Assert.assertTrue(gt.isHom());
BasicGenotype gt2 = new BasicGenotype(GenomeLocParser.createGenomeLoc("chr1",1,1),"GA",'A',0);
BasicGenotype gt2 = new BasicGenotype(GenomeLocParser.createGenomeLoc("chr1",1,1),"GA","A",0);
Assert.assertTrue(!gt2.isHom());
}
@Test
public void testBasicGenotypeIsHet() {
BasicGenotype gt = new BasicGenotype(GenomeLocParser.createGenomeLoc("chr1",1,1),"AA",'A',0);
BasicGenotype gt = new BasicGenotype(GenomeLocParser.createGenomeLoc("chr1",1,1),"AA","A",0);
Assert.assertTrue(!gt.isHet());
BasicGenotype gt2 = new BasicGenotype(GenomeLocParser.createGenomeLoc("chr1",1,1),"GA",'A',0);
BasicGenotype gt2 = new BasicGenotype(GenomeLocParser.createGenomeLoc("chr1",1,1),"GA","A",0);
Assert.assertTrue(gt2.isHet());
}
@Test
public void testBasicGenotypeIsVariant() {
BasicGenotype gt = new BasicGenotype(GenomeLocParser.createGenomeLoc("chr1",1,1),"AA",'A',0);
BasicGenotype gt = new BasicGenotype(GenomeLocParser.createGenomeLoc("chr1",1,1),"AA","A",0);
Assert.assertTrue(!gt.isVariant('A'));
BasicGenotype gt2 = new BasicGenotype(GenomeLocParser.createGenomeLoc("chr1",1,1),"GA",'A',0);
BasicGenotype gt2 = new BasicGenotype(GenomeLocParser.createGenomeLoc("chr1",1,1),"GA","A",0);
Assert.assertTrue(gt2.isVariant('A'));
BasicGenotype gt3 = new BasicGenotype(GenomeLocParser.createGenomeLoc("chr1",1,1),"TT",'A',0);
BasicGenotype gt3 = new BasicGenotype(GenomeLocParser.createGenomeLoc("chr1",1,1),"TT","A",0);
Assert.assertTrue(gt3.isVariant('A'));
}
}

View File

@ -193,7 +193,7 @@ class FakeGenotype extends GLFGenotypeCall implements Comparable<FakeGenotype> {
* @param negLog10PError the confidence score
*/
public FakeGenotype(GenomeLoc location, String genotype, char ref, double negLog10PError, double likelihoods[]) {
super(ref, location);
super(Character.toString(ref), location);
setLikelihoods(likelihoods);
setGenotype(genotype);
setNegLog10PError(negLog10PError);

View File

@ -290,8 +290,8 @@ public class VCFReaderTest extends BaseTest {
grecords = rec.getVCFGenotypeRecords();
for ( VCFGenotypeRecord grec : grecords ) {
if ( !grec.isEmptyGenotype() ) {
Assert.assertTrue(grec.isVariant(rec.getReferenceBase()));
Assert.assertEquals(rec, grec.toVariation(rec.getReferenceBase()));
Assert.assertTrue(grec.isVariant(rec.getReference().charAt(0)));
Assert.assertEquals(rec, grec.toVariation(rec.getReference().charAt(0)));
}
}
@ -300,7 +300,7 @@ public class VCFReaderTest extends BaseTest {
rec = reader.next();
grecords = rec.getVCFGenotypeRecords();
for ( VCFGenotypeRecord grec : grecords ) {
if ( !grec.isVariant(rec.getReferenceBase()) ) {
if ( !grec.isVariant(rec.getReference().charAt(0)) ) {
Assert.assertTrue(grec.isHom());
Assert.assertTrue(grec.getFields().get("GQ").equals("-1"));
Assert.assertEquals(-1, grec.getReadCount());

View File

@ -44,7 +44,7 @@ public class VCFRecordTest extends BaseTest {
altBases.add(new VCFGenotypeEncoding("D1"));
List<VCFGenotypeRecord> genotypeObjects = new ArrayList<VCFGenotypeRecord>();
genotypeObjects.add(createGenotype("sample1", "A", "A"));
return new VCFRecord('A', "chr1", 1, "RANDOM", altBases, 0, ".", infoFields, "GT:AA", genotypeObjects);
return new VCFRecord("A", "chr1", 1, "RANDOM", altBases, 0, ".", infoFields, "GT:AA", genotypeObjects);
}
/**
@ -72,7 +72,7 @@ public class VCFRecordTest extends BaseTest {
altBases.add(new VCFGenotypeEncoding("D1"));
List<VCFGenotypeRecord> genotypeObjects = new ArrayList<VCFGenotypeRecord>();
genotypeObjects.add(createGenotype("sample1", "A", "A"));
VCFRecord rec = new VCFRecord('A', "chr1", 1, "RANDOM", altBases, 0, ".", new HashMap<String,String>(), "GT:AA", genotypeObjects);
VCFRecord rec = new VCFRecord("A", "chr1", 1, "RANDOM", altBases, 0, ".", new HashMap<String,String>(), "GT:AA", genotypeObjects);
Assert.assertEquals(2, rec.getAlternateAlleles().size());
}

View File

@ -92,7 +92,7 @@ public class VCFWriterTest extends BaseTest {
rec.setField("bb", "0");
gt.add(rec);
}
return new VCFRecord('A',"chr1",1,"RANDOM",altBases,0,".",infoFields, "GT:AA",gt);
return new VCFRecord("A","chr1",1,"RANDOM",altBases,0,".",infoFields, "GT:AA",gt);
}