1. To make indel calls, we need to get rid of the SNP-centricity of our code. First step is to have the reference be a String, not a char in the Genotype. Note that this is just a temporary patch until the genotype code is ported over to use VariantContext.
2. Significant refactoring of Plink code to work in the rods and use VariantContext. More coming. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2913 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
6ceae22793
commit
5f3c80d9aa
|
|
@ -1,105 +1,124 @@
|
|||
package org.broadinstitute.sting.gatk.refdata;
|
||||
/*
|
||||
import org.broadinstitute.sting.oneoffprojects.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.oneoffprojects.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.*;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||
|
||||
import java.io.*;
|
||||
import java.util.*;
|
||||
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: chartl
|
||||
* Date: Jan 19, 2010
|
||||
* Time: 10:24:18 AM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
* User: chartl, ebanks
|
||||
*
|
||||
public class PlinkRod extends BasicReferenceOrderedDatum implements ReferenceOrderedDatum {
|
||||
*/
|
||||
public class PlinkRod extends BasicReferenceOrderedDatum implements Iterator<PlinkRod> {
|
||||
|
||||
public static final String SEQUENOM_NO_CALL = "0";
|
||||
public static final String SEQUENOM_NO_VARIANT = "-";
|
||||
|
||||
private final Set<String> headerEntries = new HashSet<String>(Arrays.asList("#Family ID","Individual ID","Sex",
|
||||
"Paternal ID","Maternal ID","Phenotype", "FID","IID","PAT","MAT","SEX","PHENOTYPE"));
|
||||
private final byte SNP_MAJOR_MODE = 1;
|
||||
|
||||
private ArrayList<PlinkVariantInfo> variants;
|
||||
private PlinkVariantInfo currentVariant;
|
||||
private ListIterator<PlinkVariantInfo> variantIterator;
|
||||
|
||||
private PlinkFileType plinkFileType;
|
||||
|
||||
public enum PlinkFileType {
|
||||
STANDARD_PED,RAW_PED,BINARY_PED
|
||||
STANDARD_PED, RAW_PED, BINARY_PED
|
||||
}
|
||||
|
||||
// CONSTRUCTOR
|
||||
|
||||
public PlinkRod(String name) {
|
||||
super(name);
|
||||
}
|
||||
|
||||
public PlinkRod(String name, PlinkVariantInfo record, ListIterator<PlinkVariantInfo> iter) {
|
||||
super(name);
|
||||
currentVariant = record;
|
||||
variantIterator = iter;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object initialize(final File plinkFile) throws FileNotFoundException {
|
||||
if ( ! plinkFile.exists() ) {
|
||||
throw new FileNotFoundException("File "+plinkFile.getAbsolutePath()+" does not exist.");
|
||||
}
|
||||
|
||||
variants = parsePlinkFile(plinkFile);
|
||||
if ( variants != null ) {
|
||||
variantIterator = variants.listIterator();
|
||||
currentVariant = variantIterator.next();
|
||||
}
|
||||
|
||||
assertNotNull();
|
||||
ArrayList<PlinkVariantInfo> variants = parsePlinkFile(plinkFile);
|
||||
if ( variants == null )
|
||||
throw new IllegalStateException("Error parsing Plink file");
|
||||
|
||||
variantIterator = variants.listIterator();
|
||||
return null;
|
||||
}
|
||||
|
||||
public static PlinkRod createIterator(String name, File file) {
|
||||
PlinkRod plink = new PlinkRod(name);
|
||||
try {
|
||||
plink.initialize(file);
|
||||
} catch (FileNotFoundException e) {
|
||||
throw new StingException("Unable to find file " + file);
|
||||
}
|
||||
return plink;
|
||||
}
|
||||
|
||||
private void assertNotNull() {
|
||||
if ( currentVariant == null ) {
|
||||
throw new UnsupportedOperationException ( "Current sequenom variant information was set to null" );
|
||||
throw new IllegalStateException("Current variant information is null");
|
||||
}
|
||||
}
|
||||
|
||||
public boolean hasNext() {
|
||||
return variantIterator.hasNext();
|
||||
}
|
||||
|
||||
public PlinkRod next() {
|
||||
if ( !this.hasNext() )
|
||||
throw new NoSuchElementException("PlinkRod next called on iterator with no more elements");
|
||||
|
||||
// get the next record
|
||||
currentVariant = variantIterator.next();
|
||||
return new PlinkRod(name, currentVariant, variantIterator);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean parseLine(Object obj, String[] args) {
|
||||
if ( variantIterator.hasNext() ) {
|
||||
currentVariant = variantIterator.next();
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
throw new UnsupportedOperationException("PlinkRod does not support the parseLine method");
|
||||
}
|
||||
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException("The remove operation is not supported for a PlinkRod");
|
||||
}
|
||||
|
||||
@Override
|
||||
public GenomeLoc getLocation() {
|
||||
assertNotNull();
|
||||
return currentVariant.getLocation();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return currentVariant == null ? "" : currentVariant.toString();
|
||||
assertNotNull();
|
||||
return currentVariant.toString();
|
||||
}
|
||||
|
||||
public String getVariantName() {
|
||||
assertNotNull();
|
||||
return currentVariant.getName();
|
||||
|
||||
}
|
||||
|
||||
public ArrayList<String> getVariantSampleNames() {
|
||||
return currentVariant.getSampleNames();
|
||||
public VariantContext getVariantContext() {
|
||||
assertNotNull();
|
||||
return currentVariant.getVariantContext();
|
||||
}
|
||||
|
||||
public ArrayList<Genotype> getGenotypes() {
|
||||
return currentVariant.getGenotypes();
|
||||
}
|
||||
|
||||
public boolean variantIsSNP() {
|
||||
return currentVariant.isSNP();
|
||||
public boolean isIndel() {
|
||||
assertNotNull();
|
||||
return currentVariant.isIndel();
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -123,7 +142,7 @@ public class PlinkRod extends BasicReferenceOrderedDatum implements ReferenceOrd
|
|||
/* *** *** *** *** *** ** *** ** *** ** *** ** *** ** ***
|
||||
* * PARSING STANDARD TEXT PED FILES * **
|
||||
* *** *** *** *** *** ** *** ** *** ** *** ** *** ** ***/
|
||||
/*
|
||||
|
||||
private ArrayList<PlinkVariantInfo> parseTextFormattedPlinkFile( File file ) {
|
||||
try {
|
||||
BufferedReader reader = new BufferedReader( new FileReader ( file ) );
|
||||
|
|
@ -131,7 +150,7 @@ public class PlinkRod extends BasicReferenceOrderedDatum implements ReferenceOrd
|
|||
ArrayList<PlinkVariantInfo> seqVars = instantiateVariantListFromHeader(header);
|
||||
ArrayList<Integer> snpOffsets = getSNPOffsetsFromHeader(header);
|
||||
|
||||
String line = null;
|
||||
String line;
|
||||
do {
|
||||
line = reader.readLine();
|
||||
incorporateInfo(seqVars,snpOffsets,line);
|
||||
|
|
@ -156,40 +175,29 @@ public class PlinkRod extends BasicReferenceOrderedDatum implements ReferenceOrd
|
|||
}
|
||||
|
||||
String[] plinkInfo;
|
||||
if ( plinkFileType == PlinkFileType.STANDARD_PED) {
|
||||
plinkInfo = plinkLine.split("\t");
|
||||
} else {
|
||||
if ( plinkFileType != PlinkFileType.STANDARD_PED )
|
||||
throw new StingException("Plink file is likely of .raw or recoded format. Please use an uncoded .ped file.");
|
||||
}
|
||||
|
||||
plinkInfo = plinkLine.split("\t");
|
||||
String individualName = plinkInfo[1];
|
||||
|
||||
int snpNumber = 0;
|
||||
|
||||
if ( plinkFileType == PlinkFileType.STANDARD_PED ) {
|
||||
for ( int i : offsets ) {
|
||||
vars.get(snpNumber).addGenotypeEntry(plinkInfo[i], individualName);
|
||||
snpNumber++;
|
||||
}
|
||||
for ( int i : offsets ) {
|
||||
vars.get(snpNumber).addGenotypeEntry(plinkInfo[i].split("\\s+"), individualName);
|
||||
snpNumber++;
|
||||
}
|
||||
}
|
||||
|
||||
private ArrayList<PlinkVariantInfo> instantiateVariantListFromHeader(String header) {
|
||||
if ( header.startsWith("#") ) {// first line is a comment; what we're used to seeing
|
||||
plinkFileType = PlinkFileType.STANDARD_PED;
|
||||
} else {// first line is the raw header; comes from de-binary-ing a .bed file
|
||||
plinkFileType = PlinkFileType.RAW_PED;
|
||||
// if the first line is not a comment (what we're used to seeing),
|
||||
// then it's the raw header (comes from de-binary-ing a .bed file)
|
||||
if ( !header.startsWith("#") )
|
||||
throw new StingException("Plink file is likely of .raw or recoded format. Please use an uncoded .ped file.");
|
||||
}
|
||||
|
||||
plinkFileType = PlinkFileType.STANDARD_PED;
|
||||
|
||||
ArrayList<PlinkVariantInfo> seqVars = new ArrayList<PlinkVariantInfo>();
|
||||
String[] headerFields;
|
||||
|
||||
if ( plinkFileType == PlinkFileType.STANDARD_PED ) {
|
||||
headerFields = header.split("\t");
|
||||
} else {
|
||||
throw new StingException("Plink file is likely of .raw or recoded format. Please use an uncoded .ped file.");
|
||||
}
|
||||
String[] headerFields = header.split("\t");
|
||||
|
||||
for ( String field : headerFields ) {
|
||||
if ( ! headerEntries.contains(field) ) {
|
||||
|
|
@ -225,7 +233,7 @@ public class PlinkRod extends BasicReferenceOrderedDatum implements ReferenceOrd
|
|||
/* *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
|
||||
* * PARSING BINARY PLINK BED/BIM/FAM FILES * *
|
||||
* *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***/
|
||||
/*
|
||||
|
||||
private ArrayList<PlinkVariantInfo> parseBinaryFormattedPlinkFile(File file) {
|
||||
PlinkBinaryTrifecta binaryFiles = getPlinkBinaryTriplet(file);
|
||||
ArrayList<PlinkVariantInfo> parsedVariants = instantiateVariantsFromBimFile(binaryFiles.bimFile);
|
||||
|
|
@ -269,17 +277,16 @@ public class PlinkRod extends BasicReferenceOrderedDatum implements ReferenceOrd
|
|||
ArrayList<PlinkVariantInfo> variants = new ArrayList<PlinkVariantInfo>();
|
||||
|
||||
try {
|
||||
String line = null;
|
||||
do {
|
||||
String line = reader.readLine();
|
||||
while ( line != null ) {
|
||||
String[] snpInfo = line.split("\\s+");
|
||||
PlinkVariantInfo variant = new PlinkVariantInfo(snpInfo[1]);
|
||||
variant.setGenomeLoc(GenomeLocParser.parseGenomeLoc(snpInfo[0],Long.valueOf(snpInfo[3]), Long.valueOf(snpInfo[3])));
|
||||
variant.setAlleles(snpInfo[4],snpInfo[5]);
|
||||
variants.add(variant);
|
||||
|
||||
line = reader.readLine();
|
||||
if ( line != null ) {
|
||||
String[] snpInfo = line.split("\\s+");
|
||||
PlinkVariantInfo variant = new PlinkVariantInfo(snpInfo[1],true);
|
||||
variant.setGenomeLoc(GenomeLocParser.parseGenomeLoc(snpInfo[0],Long.valueOf(snpInfo[3]), Long.valueOf(snpInfo[3])));
|
||||
variant.setAlleles(snpInfo[4],snpInfo[5]);
|
||||
variants.add(variant);
|
||||
}
|
||||
} while ( line != null );
|
||||
}
|
||||
} catch ( IOException e ) {
|
||||
throw new StingException("There was an error reading the .bim file "+bimFile.getAbsolutePath(), e);
|
||||
}
|
||||
|
|
@ -300,7 +307,7 @@ public class PlinkRod extends BasicReferenceOrderedDatum implements ReferenceOrd
|
|||
ArrayList<String> sampleNames = new ArrayList<String>();
|
||||
|
||||
try {
|
||||
String line = null;
|
||||
String line;
|
||||
do {
|
||||
line = reader.readLine();
|
||||
if ( line != null ) {
|
||||
|
|
@ -325,7 +332,7 @@ public class PlinkRod extends BasicReferenceOrderedDatum implements ReferenceOrd
|
|||
}
|
||||
|
||||
try {
|
||||
byte genotype = -1;
|
||||
byte genotype;
|
||||
long bytesRead = 0;
|
||||
int snpOffset = 0;
|
||||
int sampleOffset = 0;
|
||||
|
|
@ -397,25 +404,26 @@ public class PlinkRod extends BasicReferenceOrderedDatum implements ReferenceOrd
|
|||
|
||||
class PlinkVariantInfo implements Comparable {
|
||||
|
||||
private enum AlleleType {
|
||||
INSERTION,DELETION
|
||||
}
|
||||
|
||||
private String variantName;
|
||||
private GenomeLoc loc;
|
||||
private ArrayList<Genotype> genotypes = new ArrayList<Genotype>();
|
||||
private ArrayList<String> sampleNames = new ArrayList<String>();
|
||||
private HashSet<Allele> alleles = new HashSet<Allele>();
|
||||
private HashSet<Genotype> genotypes = new HashSet<Genotype>();
|
||||
|
||||
private ArrayList<Allele> indelHolder;
|
||||
private ArrayList<String> sampleHolder;
|
||||
private int siteIndelLength;
|
||||
private AlleleType indelType;
|
||||
// for indels
|
||||
private boolean isIndel = false;
|
||||
private boolean isInsertion = false;
|
||||
private int indelLength = 0;
|
||||
|
||||
// for binary parsing
|
||||
|
||||
private String locAllele1;
|
||||
private String locAllele2;
|
||||
|
||||
|
||||
public PlinkVariantInfo(String variantName) {
|
||||
this.variantName = variantName;
|
||||
parseName();
|
||||
}
|
||||
|
||||
public GenomeLoc getLocation() {
|
||||
return loc;
|
||||
}
|
||||
|
|
@ -424,79 +432,82 @@ class PlinkVariantInfo implements Comparable {
|
|||
return variantName;
|
||||
}
|
||||
|
||||
public ArrayList<String> getSampleNames() {
|
||||
return sampleNames;
|
||||
public VariantContext getVariantContext() {
|
||||
try {
|
||||
return new VariantContext(variantName, loc, alleles, genotypes);
|
||||
} catch (IllegalArgumentException e) {
|
||||
throw new IllegalArgumentException(e.getMessage() + "; please make sure that e.g. a sample isn't present more than one time in your ped file");
|
||||
}
|
||||
}
|
||||
|
||||
public ArrayList<Genotype> getGenotypes() {
|
||||
return genotypes;
|
||||
}
|
||||
|
||||
public boolean isSNP() {
|
||||
return this.indelType == null;
|
||||
public boolean isIndel() {
|
||||
return isIndel;
|
||||
}
|
||||
|
||||
public void setGenomeLoc(GenomeLoc loc) {
|
||||
this.loc = loc;
|
||||
}
|
||||
|
||||
private void parseName() {
|
||||
int chromIdx = variantName.indexOf("|c");
|
||||
if ( chromIdx == -1 )
|
||||
throw new IllegalArgumentException("Variant name " + variantName + " does not adhere to required convention (...|c...)");
|
||||
String[] pieces = variantName.substring(chromIdx+2).split("_");
|
||||
if ( pieces.length < 2 )
|
||||
throw new IllegalArgumentException("Variant name " + variantName + " does not adhere to required convention (...|c..._p...)");
|
||||
|
||||
String chrom = pieces[0];
|
||||
if ( pieces[1].charAt(0) != 'p' )
|
||||
throw new IllegalArgumentException("Variant name " + variantName + " does not adhere to required convention (...|c..._p...)");
|
||||
|
||||
String pos = pieces[1].substring(1);
|
||||
loc = GenomeLocParser.parseGenomeLoc(chrom+":"+pos);
|
||||
|
||||
if ( pieces.length > 2 && (pieces[2].equals("gI") || pieces[2].equals("gD")) ) {
|
||||
// it's an indel
|
||||
isIndel = true;
|
||||
isInsertion = pieces[2].equals("gI");
|
||||
}
|
||||
}
|
||||
|
||||
public void setAlleles(String al1, String al2) {
|
||||
if ( al1.equals("0") ) {
|
||||
if ( al1.equals(PlinkRod.SEQUENOM_NO_CALL) ) {
|
||||
// encoding for a site at which no variants were detected
|
||||
locAllele1 = al2;
|
||||
} else {
|
||||
locAllele1 = al1;
|
||||
}
|
||||
locAllele2 = al2;
|
||||
if ( ! isSNP() ) {
|
||||
siteIndelLength = Math.max(locAllele1.length(),locAllele2.length());
|
||||
}
|
||||
|
||||
public void addGenotypeEntry(String[] alleleStrings, String sampleName) {
|
||||
|
||||
ArrayList<Allele> myAlleles = new ArrayList<Allele>(2);
|
||||
|
||||
for ( String alleleString : alleleStrings ) {
|
||||
if ( alleleString.equals(PlinkRod.SEQUENOM_NO_CALL) ) {
|
||||
myAlleles.add(Allele.NO_CALL);
|
||||
} else {
|
||||
Allele allele;
|
||||
if ( !isIndel ) {
|
||||
allele = new Allele(alleleString);
|
||||
} else if ( alleleString.equals(PlinkRod.SEQUENOM_NO_VARIANT) ) {
|
||||
allele = new Allele(alleleString, isInsertion);
|
||||
} else {
|
||||
allele = new Allele(alleleString, !isInsertion);
|
||||
if ( indelLength == 0 ) {
|
||||
indelLength = alleleString.length();
|
||||
loc = GenomeLocParser.parseGenomeLoc(loc.getContig(), loc.getStart(), loc.getStart()+indelLength-1);
|
||||
}
|
||||
}
|
||||
|
||||
myAlleles.add(allele);
|
||||
if ( alleles.size() < 2 )
|
||||
alleles.add(allele);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// CONSTRUCTOR
|
||||
|
||||
public PlinkVariantInfo(String variantName) {
|
||||
this.variantName = variantName;
|
||||
parseName();
|
||||
}
|
||||
|
||||
public PlinkVariantInfo(String variantName, boolean onlyLookForIndelInfo ) {
|
||||
this.variantName = variantName;
|
||||
if ( onlyLookForIndelInfo ) {
|
||||
parseNameForIndels();
|
||||
} else {
|
||||
parseName();
|
||||
}
|
||||
}
|
||||
|
||||
private void parseName() {
|
||||
String chrom = this.variantName.split("\\|c")[1].split("_")[0];
|
||||
String pos = this.variantName.split("_p")[1].split("_")[0];
|
||||
this.loc = GenomeLocParser.parseGenomeLoc(chrom+":"+pos);
|
||||
this.parseNameForIndels();
|
||||
}
|
||||
|
||||
private void parseNameForIndels() {
|
||||
if ( this.variantName.contains("_gI") || this.variantName.contains("_gD") ) {
|
||||
this.instantiateIndel();
|
||||
}
|
||||
}
|
||||
|
||||
private void instantiateIndel() {
|
||||
this.indelHolder = new ArrayList<Allele>();
|
||||
this.sampleHolder = new ArrayList<String>();
|
||||
this.siteIndelLength = -1;
|
||||
this.indelType = this.variantName.contains("_gI") ? AlleleType.INSERTION : AlleleType.DELETION;
|
||||
}
|
||||
|
||||
public void addGenotypeEntry(String genotypeString, String sampleName) {
|
||||
// identify if we're dealing with a deletion
|
||||
if ( this.isSNP() ) {
|
||||
this.addSNP(genotypeString.split("\\s+"),sampleName);
|
||||
} else {
|
||||
this.addIndel(genotypeString.split("\\s+"),sampleName);
|
||||
}
|
||||
genotypes.add(new Genotype(sampleName, myAlleles, 20.0));
|
||||
}
|
||||
|
||||
public void addBinaryGenotypeEntry( int genoTYPE, String sampleName ) {
|
||||
|
|
@ -515,139 +526,7 @@ class PlinkVariantInfo implements Comparable {
|
|||
alleleStr[1] = "0";
|
||||
}
|
||||
|
||||
if ( this.isSNP() ) {
|
||||
this.addSNP(alleleStr,sampleName);
|
||||
} else {
|
||||
this.addIndel(alleleStr,sampleName);
|
||||
}
|
||||
}
|
||||
|
||||
private void addSNP(String[] alleleStrings, String sampleName) {
|
||||
ArrayList<Allele> alleles = new ArrayList<Allele>(2);
|
||||
|
||||
for ( String alStr : alleleStrings ) {
|
||||
alleles.add(new Allele(alStr.getBytes()));
|
||||
}
|
||||
|
||||
genotypes.add(new Genotype(alleles,sampleName,20.0) );
|
||||
sampleNames.add(sampleName);
|
||||
}
|
||||
|
||||
private void addIndel(String[] alleleStrings, String sampleName) {
|
||||
String alleleStr1 = alleleStrings[0];
|
||||
String alleleStr2 = alleleStrings[1];
|
||||
if ( alleleStr1.contains("-") ^ alleleStr2.contains("-") ) {
|
||||
// heterozygous indel
|
||||
if ( alleleStr1.contains("-") ) {
|
||||
this.addHetIndel(alleleStr2,sampleName) ;
|
||||
} else {
|
||||
this.addHetIndel(alleleStr1,sampleName);
|
||||
}
|
||||
} else {
|
||||
this.addHomIndel(alleleStr1, alleleStr2, sampleName);
|
||||
}
|
||||
}
|
||||
|
||||
private void addHetIndel(String baseStr, String sampleName) {
|
||||
Allele ref;
|
||||
Allele alt;
|
||||
|
||||
if ( indelType == AlleleType.INSERTION ) {
|
||||
ref = new Allele("-",true);
|
||||
alt = new Allele(baseStr.getBytes(),false);
|
||||
} else {
|
||||
alt = new Allele("-",false);
|
||||
ref = new Allele(baseStr.getBytes(),true);
|
||||
}
|
||||
|
||||
// this.setIndelLength(alt,baseStr.length());
|
||||
|
||||
if ( ! indelHolder.isEmpty() ) {
|
||||
siteIndelLength = baseStr.length();
|
||||
this.addHeldIndels();
|
||||
}
|
||||
|
||||
Genotype indel = new Genotype(Arrays.asList(ref,alt), sampleName, 20.0);
|
||||
// this.setIndelGenotypeLength(indel,siteIndelLength);
|
||||
this.genotypes.add(indel);
|
||||
this.sampleNames.add(sampleName);
|
||||
}
|
||||
|
||||
private void addHomIndel(String strand1, String strand2, String sampleName) {
|
||||
Allele allele1;
|
||||
Allele allele2;
|
||||
boolean reference;
|
||||
if ( indelType == AlleleType.DELETION ) {
|
||||
if ( strand1.contains("-") ) {
|
||||
// homozygous deletion
|
||||
allele1 = new Allele("-",false);
|
||||
allele2 = new Allele("-",false);
|
||||
reference = false;
|
||||
} else { // homozygous reference at a deletion variant site
|
||||
allele1 = new Allele(strand1.getBytes(),true);
|
||||
allele2 = new Allele(strand2.getBytes(),true);
|
||||
reference = true;
|
||||
}
|
||||
} else {
|
||||
if ( strand1.contains("-") ) {
|
||||
// homozygous reference
|
||||
allele1 = new Allele("-",true);
|
||||
allele2 = new Allele("-",true);
|
||||
reference = true;
|
||||
} else {
|
||||
allele1 = new Allele(strand1.getBytes(),false);
|
||||
allele2 = new Allele(strand2.getBytes(),false);
|
||||
reference = false;
|
||||
}
|
||||
}
|
||||
|
||||
if ( reference ) {
|
||||
if ( ! indelHolder.isEmpty() ) {
|
||||
siteIndelLength = strand1.length();
|
||||
this.addHeldIndels();
|
||||
}
|
||||
}
|
||||
|
||||
if ( reference || siteIndelLength != -1 ) { // if we're ref or know the insertion/deletion length of the site
|
||||
Genotype gen = new Genotype(Arrays.asList(allele1,allele2), sampleName, 20.0);
|
||||
// setIndelGenotypeLength(gen,siteIndelLength);
|
||||
this.genotypes.add(gen);
|
||||
this.sampleNames.add(sampleName);
|
||||
} else { // hold on the variants until we *do* know the in/del length at this site
|
||||
this.indelHolder.add(allele1);
|
||||
this.indelHolder.add(allele2);
|
||||
this.sampleHolder.add(sampleName);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/* private void setIndelGenotypeLength(Genotype g, int length) {
|
||||
g.setAttribute(Genotype.StandardAttributes.DELETION_LENGTH,length);
|
||||
}
|
||||
|
||||
private void addHeldIndels() {
|
||||
Allele del1;
|
||||
Allele del2;
|
||||
int startingSize = indelHolder.size();
|
||||
for ( int i = 0; i < startingSize ; i+=2 ) {
|
||||
del1 = indelHolder.get(i);
|
||||
del2 = indelHolder.get(i+1);
|
||||
this.addIndelFromCache(del1,del2,sampleHolder.get(i/2));
|
||||
if ( indelHolder.size() != startingSize ) {
|
||||
throw new StingException("Halting algorithm -- possible infinite loop");
|
||||
}
|
||||
}
|
||||
indelHolder.clear();
|
||||
sampleHolder.clear();
|
||||
}
|
||||
|
||||
private void addIndelFromCache ( Allele indel1, Allele indel2, String sampleName ) {
|
||||
this.setIndelLength(indel1,siteIndelLength);
|
||||
this.setIndelLength(indel2,siteIndelLength);
|
||||
Genotype indel = new Genotype(Arrays.asList(indel1,indel2),sampleName, 20.0);
|
||||
// this.setIndelGenotypeLength(indel,siteIndelLength);
|
||||
this.genotypes.add(indel);
|
||||
this.sampleNames.add(sampleName);
|
||||
addGenotypeEntry(alleleStr, sampleName);
|
||||
}
|
||||
|
||||
public int compareTo(Object obj) {
|
||||
|
|
@ -657,22 +536,14 @@ class PlinkVariantInfo implements Comparable {
|
|||
|
||||
return loc.compareTo(((PlinkVariantInfo) obj).getLocation());
|
||||
}
|
||||
|
||||
private void setIndelLength(Allele al, int length) {
|
||||
// Todo -- once alleles support deletion lengths add that information
|
||||
// Todo -- into the object; for now this can just return
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
class PlinkBinaryTrifecta {
|
||||
|
||||
public PlinkBinaryTrifecta() {
|
||||
|
||||
}
|
||||
public PlinkBinaryTrifecta() {}
|
||||
|
||||
public File bedFile;
|
||||
public File bimFile;
|
||||
public File famFile;
|
||||
|
||||
} */
|
||||
}
|
||||
|
|
@ -111,6 +111,7 @@ public class ReferenceOrderedData<ROD extends ReferenceOrderedDatum> implements
|
|||
addModule("PicardDbSNP", rodPicardDbSNP.class);
|
||||
addModule("HapmapVCF", HapmapVCFROD.class);
|
||||
addModule("Beagle", BeagleROD.class);
|
||||
addModule("Plink", PlinkRod.class);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -366,7 +366,7 @@ public class RodGeliText extends BasicReferenceOrderedDatum implements Variation
|
|||
*/
|
||||
@Override
|
||||
public Genotype getCalledGenotype() {
|
||||
return new GeliGenotypeCall(refBase, getLocation(), bestGenotype, lodBtnb);
|
||||
return new GeliGenotypeCall(Character.toString(refBase), getLocation(), bestGenotype, lodBtnb);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -377,7 +377,7 @@ public class RodGeliText extends BasicReferenceOrderedDatum implements Variation
|
|||
@Override
|
||||
public List<Genotype> getGenotypes() {
|
||||
List<Genotype> ret = new ArrayList<Genotype>();
|
||||
ret.add(new GeliGenotypeCall(refBase, getLocation(), bestGenotype, lodBtnb));
|
||||
ret.add(new GeliGenotypeCall(Character.toString(refBase), getLocation(), bestGenotype, lodBtnb));
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -275,7 +275,7 @@ public class RodGenotypeChipAsGFF extends BasicReferenceOrderedDatum implements
|
|||
*/
|
||||
@Override
|
||||
public Genotype getCalledGenotype() {
|
||||
return new BasicGenotype(this.getLocation(),this.feature,this.getRefSnpFWD(),this.getConsensusConfidence());
|
||||
return new BasicGenotype(this.getLocation(),this.feature,Character.toString(this.getRefSnpFWD()),this.getConsensusConfidence());
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -286,7 +286,7 @@ public class RodGenotypeChipAsGFF extends BasicReferenceOrderedDatum implements
|
|||
@Override
|
||||
public List<Genotype> getGenotypes() {
|
||||
List<Genotype> ret = new ArrayList<Genotype>();
|
||||
ret.add(new BasicGenotype(this.getLocation(),this.feature,this.getRefSnpFWD(),this.getConsensusConfidence()));
|
||||
ret.add(new BasicGenotype(this.getLocation(),this.feature,Character.toString(this.getRefSnpFWD()),this.getConsensusConfidence()));
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -196,14 +196,7 @@ public class VariantContextAdaptors {
|
|||
|
||||
public static VCFRecord toVCF(VariantContext vc) {
|
||||
// deal with the reference
|
||||
char referenceBase = 'N'; // by default we'll use N
|
||||
if ( vc.getReference().length() == 1 ) {
|
||||
referenceBase = (char)vc.getReference().getBases()[0];
|
||||
}
|
||||
|
||||
if ( ! vc.isSNP() )
|
||||
// todo -- update the code so it works correctly with indels
|
||||
throw new StingException("VariantContext -> VCF converter currently doesn't support indels; complain to the GATK team");
|
||||
String referenceBase = new String(vc.getReference().getBases());
|
||||
|
||||
String contig = vc.getLocation().getContig();
|
||||
long position = vc.getLocation().getStart();
|
||||
|
|
|
|||
|
|
@ -95,7 +95,7 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul
|
|||
for ( String sample : GLs.keySet() ) {
|
||||
|
||||
// create the call
|
||||
GenotypeCall call = GenotypeWriterFactory.createSupportedGenotypeCall(OUTPUT_FORMAT, ref, loc);
|
||||
GenotypeCall call = GenotypeWriterFactory.createSupportedGenotypeCall(OUTPUT_FORMAT, Character.toString(ref), loc);
|
||||
|
||||
// set the genotype and confidence
|
||||
Pair<Integer, Double> AFbasedGenotype = matrix.getGenotype(frequency, sample);
|
||||
|
|
|
|||
|
|
@ -347,7 +347,7 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> {
|
|||
private RodVCF fakeVCFForSample(RodVCF eval, ReferenceContext ref, final String sampleName) {
|
||||
VCFGenotypeRecord genotype = (VCFGenotypeRecord)eval.getGenotype(sampleName);
|
||||
if ( genotype.getNegLog10PError() > 0 ) {
|
||||
VCFRecord record = new VCFRecord(ref.getBase(), ref.getLocus(), "GT");
|
||||
VCFRecord record = new VCFRecord(Character.toString(ref.getBase()), ref.getLocus(), "GT");
|
||||
record.setAlternateBases(eval.getRecord().getAlternateAlleles());
|
||||
record.addGenotypeRecord(genotype);
|
||||
record.setQual(10*genotype.getNegLog10PError());
|
||||
|
|
|
|||
|
|
@ -83,9 +83,9 @@ public class DeNovoSNPWalker extends RefWalker<String, Integer>{
|
|||
Genotype parent1call = parent1.genotypes.get(0);
|
||||
Genotype parent2call = parent2.genotypes.get(0);
|
||||
|
||||
if (!parent1call.isVariant(parent1call.getReference()) &&
|
||||
if (!parent1call.isVariant(parent1call.getReference().charAt(0)) &&
|
||||
parent1call.getNegLog10PError() > 5 &&
|
||||
!parent2call.isVariant(parent2call.getReference()) &&
|
||||
!parent2call.isVariant(parent2call.getReference().charAt(0)) &&
|
||||
parent2call.getNegLog10PError() > 5) {
|
||||
|
||||
double sumConfidences = 0.5 * (0.5 * child.getNegLog10PError() +
|
||||
|
|
|
|||
|
|
@ -81,7 +81,7 @@ public class SnpCallRateByCoverageWalker extends LocusWalker<List<String>, Strin
|
|||
VariantCallContext calls = UG.map(tracker, ref, subContext);
|
||||
if (calls != null && calls.genotypes != null && calls.genotypes.size() > 0) {
|
||||
Genotype call = calls.genotypes.get(0);
|
||||
String callType = (call.isVariant(call.getReference())) ? ((call.isHom()) ? "HomozygousSNP" : "HeterozygousSNP") : "HomozygousReference";
|
||||
String callType = (call.isVariant(call.getReference().charAt(0))) ? ((call.isHom()) ? "HomozygousSNP" : "HeterozygousSNP") : "HomozygousReference";
|
||||
GenotypeCalls.add(coverage+"\t"+coverage_available+"\t"+hc_genotype+"\t"+callType+"\t"+toGeliString(call));
|
||||
}
|
||||
}
|
||||
|
|
@ -113,7 +113,7 @@ public class SnpCallRateByCoverageWalker extends LocusWalker<List<String>, Strin
|
|||
double nextVrsBest = 0;
|
||||
double nextVrsRef = 0;
|
||||
|
||||
char ref = locus.getReference();
|
||||
char ref = locus.getReference().charAt(0);
|
||||
|
||||
if (locus instanceof ReadBacked) {
|
||||
readDepth = ((ReadBacked)locus).getReadCount();
|
||||
|
|
|
|||
|
|
@ -63,7 +63,7 @@ public class HapMap2VCF extends RodWalker<Integer, Integer> {
|
|||
VCFGenotypeEncoding refAllele = new VCFGenotypeEncoding(ref_allele.toString());
|
||||
|
||||
// Create a new record
|
||||
VCFRecord record = new VCFRecord(ref_allele, context.getLocation(), "GT");
|
||||
VCFRecord record = new VCFRecord(Character.toString(ref_allele), context.getLocation(), "GT");
|
||||
|
||||
// Record each sample's genotype info
|
||||
String[] hapmap_rod_genotypes = hapmap_rod.getGenotypes();
|
||||
|
|
|
|||
|
|
@ -2,14 +2,15 @@ package org.broadinstitute.sting.playground.gatk.walkers.variantstovcf;
|
|||
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.*;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.HardyWeinberg;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.RefWalker;
|
||||
import org.broadinstitute.sting.gatk.refdata.PlinkRod;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||
import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors;
|
||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
|
||||
import org.broadinstitute.sting.utils.genotype.Genotype;
|
||||
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.*;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
|
|
@ -21,9 +22,7 @@ import java.util.*;
|
|||
/**
|
||||
* Converts Sequenom files to a VCF annotated with QC metrics (HW-equilibrium, % failed probes)
|
||||
*/
|
||||
public class PlinkToVCF extends RefWalker<VCFRecord,Integer> {
|
||||
@Argument(fullName="sequenomePedFile", shortName="sPed", doc="The sequenome file from which to generate a VCF", required=true)
|
||||
public File seqFile = null;
|
||||
public class PlinkToVCF extends RodWalker<VCFRecord,Integer> {
|
||||
@Argument(fullName="outputVCF", shortName="vcf", doc="The VCF file to write to", required=true)
|
||||
public File vcfFile = null;
|
||||
@Argument(fullName="numberOfSamples", shortName="ns", doc="The number of samples sequenced", required=false)
|
||||
|
|
@ -44,7 +43,6 @@ public class PlinkToVCF extends RefWalker<VCFRecord,Integer> {
|
|||
"FID","IID","PAT","MAT","SEX","PHENOTYPE"));
|
||||
private final int INIT_NUMBER_OF_POPULATIONS = 10;
|
||||
private final int DEFAULT_QUALITY = 20;
|
||||
private HashMap<String, SequenomVariantInfo> sequenomResults = new HashMap<String,SequenomVariantInfo>();
|
||||
private ArrayList<String> sampleNames = new ArrayList<String>(nSamples);
|
||||
private VCFGenotypeWriterAdapter vcfWriter;
|
||||
private final HardyWeinberg HWCalc = new HardyWeinberg();
|
||||
|
|
@ -52,17 +50,15 @@ public class PlinkToVCF extends RefWalker<VCFRecord,Integer> {
|
|||
private HashMap<String,String> samplesToPopulation;
|
||||
|
||||
public void initialize() {
|
||||
//System.out.println("Initializing... parse sequenom file");
|
||||
parseSequenomFile(seqFile);
|
||||
vcfWriter = new VCFGenotypeWriterAdapter(vcfFile);
|
||||
if ( useSmartHardy ) {
|
||||
samplesToPopulation = parsePopulationFile(popFile);
|
||||
}
|
||||
Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
|
||||
hInfo.addAll(VCFUtils.getHeaderFields(getToolkit()));
|
||||
hInfo.add(new VCFHeaderLine("source", "Sequenom2VCF"));
|
||||
hInfo.add(new VCFHeaderLine("source", "PlinkToVCF"));
|
||||
hInfo.add(new VCFHeaderLine("reference", getToolkit().getArguments().referenceFile.getName()));
|
||||
vcfWriter.writeHeader(new TreeSet<String>(sampleNames),hInfo);
|
||||
vcfWriter.writeHeader(new TreeSet<String>(sampleNames), hInfo);
|
||||
nSamples = sampleNames.size();
|
||||
}
|
||||
|
||||
|
|
@ -72,21 +68,32 @@ public class PlinkToVCF extends RefWalker<VCFRecord,Integer> {
|
|||
}
|
||||
|
||||
public VCFRecord map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
if ( sequenomResults.containsKey(context.getLocation().toString()) ) {
|
||||
SequenomVariantInfo varInfo = sequenomResults.remove(context.getLocation().toString());
|
||||
return addVariantInformationToCall(ref,varInfo);
|
||||
} else {
|
||||
if ( tracker == null )
|
||||
return null;
|
||||
|
||||
// get the Plink rod at this locus if there is one
|
||||
PlinkRod plinkRod = null;
|
||||
Iterator<ReferenceOrderedDatum> rods = tracker.getAllRods().iterator();
|
||||
while (rods.hasNext()) {
|
||||
ReferenceOrderedDatum rod = rods.next();
|
||||
if ( rod instanceof PlinkRod ) {
|
||||
plinkRod = (PlinkRod)rod;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if ( plinkRod == null )
|
||||
return null;
|
||||
|
||||
return addVariantInformationToCall(ref, plinkRod);
|
||||
}
|
||||
|
||||
public Integer reduce(VCFRecord call, Integer numVariants) {
|
||||
if ( call == null ) {
|
||||
return numVariants;
|
||||
} else {
|
||||
if ( call != null ) {
|
||||
numVariants++;
|
||||
printToVCF(call);
|
||||
return 1 + numVariants;
|
||||
}
|
||||
return numVariants;
|
||||
}
|
||||
|
||||
public void onTraversalDone(Integer finalReduce) {
|
||||
|
|
@ -106,51 +113,25 @@ public class PlinkToVCF extends RefWalker<VCFRecord,Integer> {
|
|||
}
|
||||
}
|
||||
|
||||
private VCFRecord addVariantInformationToCall(ReferenceContext ref, SequenomVariantInfo varInfo) {
|
||||
int numNoCalls = 0;
|
||||
int numHomNonrefCalls = 0;
|
||||
int numNonrefAlleles = 0;
|
||||
private VCFRecord addVariantInformationToCall(ReferenceContext ref, PlinkRod plinkRod) {
|
||||
|
||||
int sampleNumber = 0;
|
||||
VCFRecord record = new VCFRecord(ref.getBase(),ref.getLocus(),"GT");
|
||||
VariantContext vContext = plinkRod.getVariantContext();
|
||||
VCFRecord record = VariantContextAdaptors.toVCF(vContext);
|
||||
record.setGenotypeFormatString("GT");
|
||||
|
||||
for ( String genTypeStr : varInfo.getGenotypes() ) {
|
||||
if ( genTypeStr.indexOf("0") == -1 ) {
|
||||
VCFGenotypeEncoding allele1 = new VCFGenotypeEncoding(genTypeStr.substring(0,1));
|
||||
VCFGenotypeEncoding allele2 = new VCFGenotypeEncoding(genTypeStr.substring(1));
|
||||
List<VCFGenotypeEncoding> alleles = new ArrayList<VCFGenotypeEncoding>(2);
|
||||
alleles.add(allele1);
|
||||
alleles.add(allele2);
|
||||
int numNoCalls = vContext.getNoCallCount();
|
||||
int numHomVarCalls = vContext.getHomVarCount();
|
||||
int numHetCalls = vContext.getHetCount();
|
||||
|
||||
VCFGenotypeRecord genotype = new VCFGenotypeRecord(sampleNames.get(sampleNumber), alleles, VCFGenotypeRecord.PHASE.UNPHASED);
|
||||
genotype.setField("GQ",String.format("%d",DEFAULT_QUALITY));
|
||||
|
||||
if ( genotype.isVariant(ref.getBase()) ) {
|
||||
if ( genotype.isHom() ) {
|
||||
numHomNonrefCalls++;
|
||||
numNonrefAlleles+=2;
|
||||
record.addAlternateBase(allele1);
|
||||
} else {
|
||||
numNonrefAlleles++;
|
||||
record.addAlternateBase(allele1.getBases().equalsIgnoreCase(String.format("%c",ref.getBase())) ? allele2 : allele1);
|
||||
}
|
||||
}
|
||||
|
||||
record.addGenotypeRecord(genotype);
|
||||
|
||||
} else {
|
||||
numNoCalls++;
|
||||
}
|
||||
sampleNumber++;
|
||||
}
|
||||
|
||||
double noCallProp = ( (double) numNoCalls )/( (double) sampleNames.size());
|
||||
double homNonRProp = ( (double) numHomNonrefCalls )/( (double) sampleNames.size() - numNoCalls);
|
||||
double homNonRProp = ( (double) numHomVarCalls )/( (double) sampleNames.size() - numNoCalls);
|
||||
|
||||
record.setQual(DEFAULT_QUALITY);
|
||||
String hw = hardyWeinbergCalculation(ref,record);
|
||||
double hwScore = hw != null ? Double.valueOf(hw) : -0.0;
|
||||
record.addInfoFields(generateInfoField(record, numNoCalls,numHomNonrefCalls,numNonrefAlleles,ref, varInfo, hwScore));
|
||||
// TODO -- record.addInfoFields(generateInfoField(record, numNoCalls,numHomVarCalls,numNonrefAlleles,ref, plinkRod, hwScore));
|
||||
if ( hwScore > maxHardy ) {
|
||||
record.setFilterString("Hardy-Weinberg");
|
||||
} else if ( noCallProp > maxNoCall ) {
|
||||
|
|
@ -174,7 +155,7 @@ public class PlinkToVCF extends RefWalker<VCFRecord,Integer> {
|
|||
}
|
||||
|
||||
private Map<String,String> generateInfoField(VCFRecord rec, int nocall, int homnonref, int allnonref,
|
||||
ReferenceContext ref, SequenomVariantInfo info, double hwScore) {
|
||||
ReferenceContext ref, PlinkRod info, double hwScore) {
|
||||
double propNoCall = ( ( double ) nocall / (double) nSamples );
|
||||
double propHomNR = ( (double) homnonref / (double) nSamples );
|
||||
HashMap<String,String> infoMap = new HashMap<String,String>(1);
|
||||
|
|
@ -206,13 +187,13 @@ public class PlinkToVCF extends RefWalker<VCFRecord,Integer> {
|
|||
for ( String name : sampleNames ) {
|
||||
String pop = samplesToPopulation.get(name);
|
||||
if ( rec.getGenotype(name) != null ) {
|
||||
genotypesByPopulation.get(pop).add(rec.getGenotype(name));
|
||||
// TODO -- genotypesByPopulation.get(pop).add(rec.getGenotype(name));
|
||||
}
|
||||
}
|
||||
|
||||
for ( String population : samplesToPopulation.values() ) {
|
||||
VCFVariationCall v = new VCFVariationCall(ref.getBase(),ref.getLocus(),VCFVariationCall.VARIANT_TYPE.SNP);
|
||||
v.setGenotypeCalls(genotypesByPopulation.get(population));
|
||||
// TODO -- v.setGenotypeCalls(genotypesByPopulation.get(population));
|
||||
hardyWeinbergByPopulation.put(population,HWCalc.annotate(null,ref,null,v));
|
||||
}
|
||||
|
||||
|
|
@ -229,111 +210,6 @@ public class PlinkToVCF extends RefWalker<VCFRecord,Integer> {
|
|||
return String.format("%s",maxH);
|
||||
}
|
||||
|
||||
private void parseSequenomFile(File sequenomFile) {
|
||||
BufferedReader reader;
|
||||
try {
|
||||
reader = new BufferedReader(new FileReader(sequenomFile));
|
||||
} catch ( IOException e ) {
|
||||
throw new StingException("Sequenom file could not be opened",e);
|
||||
}
|
||||
String header;
|
||||
try {
|
||||
header = reader.readLine();
|
||||
//System.out.println("Read header line, it was "+header);
|
||||
} catch (IOException e) {
|
||||
throw new StingException(e.getMessage(),e);
|
||||
}
|
||||
HashMap<Integer,SequenomVariantInfo> sequenomVariants = parseSequenomHeader(header);
|
||||
try {
|
||||
String line;
|
||||
do {
|
||||
line = reader.readLine();
|
||||
//ystem.out.println("Read line, it was"+line);
|
||||
if ( line != null ) {
|
||||
//System.out.println("Parsing line...");
|
||||
parseSequenomLine(sequenomVariants,line);
|
||||
}
|
||||
} while ( line != null);
|
||||
|
||||
reader.close();
|
||||
convertToLocusMap(sequenomVariants);
|
||||
|
||||
} catch ( IOException e) {
|
||||
throw new StingException("Error reading sequenom file", e);
|
||||
}
|
||||
}
|
||||
|
||||
private HashMap<Integer,SequenomVariantInfo> parseSequenomHeader(String header) {
|
||||
// the header will give us all the probe names and contigs
|
||||
//System.out.println("Parsing header");
|
||||
HashMap<Integer,SequenomVariantInfo> variantsByFileOffset = new HashMap<Integer,SequenomVariantInfo>();
|
||||
String[] fields = header.split("\t");
|
||||
int fieldOffset = 0;
|
||||
for ( String entry : fields ) {
|
||||
if ( ! HEADER_FIELDS.contains(entry) ) {
|
||||
//System.out.println(entry);
|
||||
// actually a SNP
|
||||
String snpName = entry.split("\\|")[1];
|
||||
//System.out.println("Entry: "+entry+" Name: "+snpName);
|
||||
variantsByFileOffset.put(fieldOffset,new SequenomVariantInfo(snpName,nSamples));
|
||||
//System.out.println("Adding entry for offset "+fieldOffset);
|
||||
}
|
||||
fieldOffset++;
|
||||
}
|
||||
|
||||
return variantsByFileOffset;
|
||||
}
|
||||
|
||||
private void parseSequenomLine(HashMap<Integer,SequenomVariantInfo> variants, String line) {
|
||||
if ( line == null ) {
|
||||
// early return
|
||||
//System.out.println("Line was null in file");
|
||||
return;
|
||||
}
|
||||
String[] entries = line.split("\t");
|
||||
int offset = 0;
|
||||
if ( entries[1].equalsIgnoreCase("empty")) {
|
||||
return;
|
||||
}
|
||||
sampleNames.add(entries[1]);
|
||||
for ( String entry : entries ) {
|
||||
if ( variants.containsKey(offset) ) { // actual SNP
|
||||
variants.get(offset).addGenotype(entry);
|
||||
//System.out.println("Added: "+entry+"To "+offset);
|
||||
}
|
||||
offset++;
|
||||
}
|
||||
}
|
||||
|
||||
private void convertToLocusMap(HashMap<Integer,SequenomVariantInfo> variants) {
|
||||
for ( SequenomVariantInfo variant : variants.values() ) {
|
||||
//System.out.println("Variant name: "+variant.getName());
|
||||
String loc = genomeLocFromVariantName(variant.getName());
|
||||
//System.out.println(variant.getName());
|
||||
if ( loc == null ) {
|
||||
throw new StingException("Genome locus was null");
|
||||
}
|
||||
sequenomResults.put(loc,variant);
|
||||
}
|
||||
|
||||
variants.clear();
|
||||
variants = null;
|
||||
}
|
||||
|
||||
private String genomeLocFromVariantName(String name) {
|
||||
String[] nameInfo = name.split("_");
|
||||
//System.out.println(name);
|
||||
//System.out.println(nameInfo[0]);
|
||||
String chr = nameInfo[0].substring(1); // format: c3 becomes 3
|
||||
String pos = nameInfo[1].substring(1); // format: p9961104 becomes 9961104
|
||||
if ( useb36contigs ) {
|
||||
return chr+":"+pos;
|
||||
} else {
|
||||
return "chr"+chr+":"+pos;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private HashMap<String,String> parsePopulationFile(File file) {
|
||||
HashMap<String,String> samplesToPopulation = new HashMap<String,String>();
|
||||
try {
|
||||
|
|
@ -352,29 +228,4 @@ public class PlinkToVCF extends RefWalker<VCFRecord,Integer> {
|
|||
|
||||
return samplesToPopulation;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
class SequenomVariantInfo {
|
||||
private ArrayList<String> genotypes;
|
||||
private String variantName;
|
||||
|
||||
public SequenomVariantInfo(String name, int nPeople) {
|
||||
genotypes = new ArrayList<String>(nPeople);
|
||||
variantName = name;
|
||||
}
|
||||
|
||||
public void addGenotype(String genotype) {
|
||||
String[] alleles = genotype.split(" ");
|
||||
genotypes.add(alleles[0]+alleles[1]);
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return variantName;
|
||||
}
|
||||
|
||||
public ArrayList<String> getGenotypes() {
|
||||
return genotypes;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -186,7 +186,7 @@ public class VariantsToVCF extends RefWalker<Integer, Integer> {
|
|||
if (dbsnp != null) info.put("DB","1");
|
||||
if (dbsnp != null && dbsnp.isHapmap()) info.put("H2","1");
|
||||
|
||||
return new VCFRecord(ref.getBase(),
|
||||
return new VCFRecord(Character.toString(ref.getBase()),
|
||||
context.getContig(),
|
||||
(int) context.getPosition(),
|
||||
(dbsnp == null) ? "." : dbsnp.getRS_ID(),
|
||||
|
|
|
|||
|
|
@ -90,7 +90,7 @@ public class VCFSubsetWalker extends RodWalker<ArrayList<VCFRecord>, VCFWriter>
|
|||
}
|
||||
}
|
||||
|
||||
VCFRecord subset = new VCFRecord(record.getReferenceBase(),
|
||||
VCFRecord subset = new VCFRecord(record.getReference(),
|
||||
record.getLocation().getContig(),
|
||||
(int) record.getLocation().getStart(),
|
||||
record.getID(),
|
||||
|
|
@ -110,7 +110,7 @@ public class VCFSubsetWalker extends RodWalker<ArrayList<VCFRecord>, VCFWriter>
|
|||
|
||||
boolean isVariant = false;
|
||||
for (VCFGenotypeEncoding ge : ((VCFGenotypeRecord)subset.getGenotypes().get(0)).getAlleles()) {
|
||||
if (record.getReferenceBase() != ge.getBases().charAt(0)) {
|
||||
if (!record.getReference().equals(ge.getBases())) {
|
||||
isVariant = true;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -73,7 +73,7 @@ class VCFCallRates extends CommandLineProgram
|
|||
record = reader.next();
|
||||
}
|
||||
|
||||
char ref = record.getReferenceBase();
|
||||
char ref = record.getReference().charAt(0);
|
||||
|
||||
String[] new_sample_names = record.getSampleNames();
|
||||
if (new_sample_names.length != sample_names.length) { throw new RuntimeException(); }
|
||||
|
|
|
|||
|
|
@ -91,7 +91,7 @@ class VCFSequenomAnalysis extends CommandLineProgram
|
|||
continue;
|
||||
}
|
||||
|
||||
char ref = record1.getReferenceBase();
|
||||
char ref = record1.getReference().charAt(0);
|
||||
char alt = VCFTool.getAlt(record2);
|
||||
|
||||
int n_total_sequenom = VCFTool.Compute_n_total(record1);
|
||||
|
|
|
|||
|
|
@ -95,7 +95,7 @@ class VCFSequenomAnalysis2 extends CommandLineProgram
|
|||
|
||||
String[] sample_names = record2.getSampleNames();
|
||||
|
||||
char ref = record1.getReferenceBase();
|
||||
char ref = record1.getReference().charAt(0);
|
||||
char alt = VCFTool.getAlt(record2);
|
||||
|
||||
int n_total_sequenom = VCFTool.Compute_n_total(record1, sample_names);
|
||||
|
|
@ -211,8 +211,8 @@ class VCFSequenomAnalysis2 extends CommandLineProgram
|
|||
Arrays.sort(c);
|
||||
g = new String(c);
|
||||
if (g.equals("..")) { continue; }
|
||||
if (g.charAt(0) == sequencing.getReferenceBase()) { n_ref += 1; } else { n_alt += 1; singleton_name = sample_names[i]; }
|
||||
if (g.charAt(1) == sequencing.getReferenceBase()) { n_ref += 1; } else { n_alt += 1; singleton_name = sample_names[i]; }
|
||||
if (g.charAt(0) == sequencing.getReference().charAt(0)) { n_ref += 1; } else { n_alt += 1; singleton_name = sample_names[i]; }
|
||||
if (g.charAt(1) == sequencing.getReference().charAt(0)) { n_ref += 1; } else { n_alt += 1; singleton_name = sample_names[i]; }
|
||||
}
|
||||
if (n_alt != 1) { throw new RuntimeException(); }
|
||||
if (singleton_name.equals("")) { throw new RuntimeException(); }
|
||||
|
|
@ -234,8 +234,8 @@ class VCFSequenomAnalysis2 extends CommandLineProgram
|
|||
Arrays.sort(c);
|
||||
g = new String(c);
|
||||
if (g.equals("..")) { continue; }
|
||||
if (g.charAt(0) == sequenom.getReferenceBase()) { n_ref += 1; } else { n_alt += 1; singleton_name = sample_names[i]; }
|
||||
if (g.charAt(1) == sequenom.getReferenceBase()) { n_ref += 1; } else { n_alt += 1; singleton_name = sample_names[i]; }
|
||||
if (g.charAt(0) == sequenom.getReference().charAt(0)) { n_ref += 1; } else { n_alt += 1; singleton_name = sample_names[i]; }
|
||||
if (g.charAt(1) == sequenom.getReference().charAt(0)) { n_ref += 1; } else { n_alt += 1; singleton_name = sample_names[i]; }
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
@ -263,8 +263,8 @@ class VCFSequenomAnalysis2 extends CommandLineProgram
|
|||
Arrays.sort(c);
|
||||
g = new String(c);
|
||||
if (g.equals("..")) { continue; }
|
||||
if (g.charAt(0) == sequencing.getReferenceBase()) { n_ref += 1; } else { n_alt += 1; }
|
||||
if (g.charAt(1) == sequencing.getReferenceBase()) { n_ref += 1; } else { n_alt += 1; }
|
||||
if (g.charAt(0) == sequencing.getReference().charAt(0)) { n_ref += 1; } else { n_alt += 1; }
|
||||
if (g.charAt(1) == sequencing.getReference().charAt(0)) { n_ref += 1; } else { n_alt += 1; }
|
||||
if (n_alt == 1) { het_samples.add(sample_names[i]); }
|
||||
}
|
||||
|
||||
|
|
@ -291,8 +291,8 @@ class VCFSequenomAnalysis2 extends CommandLineProgram
|
|||
Arrays.sort(c);
|
||||
g = new String(c);
|
||||
if (g.equals("..")) { dropped_hets += 1; continue; }
|
||||
if (g.charAt(0) == sequenom.getReferenceBase()) { n_ref += 1; } else { n_alt += 1; }
|
||||
if (g.charAt(1) == sequenom.getReferenceBase()) { n_ref += 1; } else { n_alt += 1; }
|
||||
if (g.charAt(0) == sequenom.getReference().charAt(0)) { n_ref += 1; } else { n_alt += 1; }
|
||||
if (g.charAt(1) == sequenom.getReference().charAt(0)) { n_ref += 1; } else { n_alt += 1; }
|
||||
if (n_alt == 1) { matched_hets += 1; }
|
||||
else { mismatched_hets += 1; }
|
||||
}
|
||||
|
|
|
|||
|
|
@ -263,7 +263,7 @@ class CheckRefFields extends CommandLineProgram
|
|||
}
|
||||
|
||||
long offset = record.getLocation().getStart();
|
||||
char vcf_ref_base = record.getReferenceBase();
|
||||
char vcf_ref_base = record.getReference().charAt(0);
|
||||
char fasta_ref_base = (char)ref_seq[(int)offset-1];
|
||||
|
||||
List<VCFGenotypeEncoding> alleles = record.getAlternateAlleles();
|
||||
|
|
@ -338,7 +338,7 @@ class FixRefFields extends CommandLineProgram
|
|||
}
|
||||
|
||||
long offset = record.getLocation().getStart();
|
||||
char vcf_ref_base = record.getReferenceBase();
|
||||
char vcf_ref_base = record.getReference().charAt(0);
|
||||
char fasta_ref_base = (char)ref_seq[(int)offset-1];
|
||||
|
||||
List<VCFGenotypeEncoding> alleles = record.getAlternateAlleles();
|
||||
|
|
@ -524,7 +524,7 @@ class PrintGQ extends CommandLineProgram
|
|||
record = reader.next();
|
||||
}
|
||||
|
||||
char ref = record.getReferenceBase();
|
||||
char ref = record.getReference().charAt(0);
|
||||
|
||||
String[] sample_names = record.getSampleNames();
|
||||
|
||||
|
|
@ -617,7 +617,7 @@ class VCFSimpleStats extends CommandLineProgram
|
|||
record1 = reader1.next();
|
||||
}
|
||||
|
||||
char ref = record1.getReferenceBase();
|
||||
char ref = record1.getReference().charAt(0);
|
||||
|
||||
|
||||
String[] sample_names1 = record1.getSampleNames();
|
||||
|
|
@ -845,7 +845,7 @@ class VCFConcordance extends CommandLineProgram
|
|||
}
|
||||
|
||||
|
||||
char ref = record1.getReferenceBase();
|
||||
char ref = record1.getReference().charAt(0);
|
||||
|
||||
String[] sample_names1 = record1.getSampleNames();
|
||||
String[] sample_names2 = record2.getSampleNames();
|
||||
|
|
@ -1260,7 +1260,7 @@ public class VCFTool
|
|||
|
||||
public static boolean isTransition(VCFRecord record)
|
||||
{
|
||||
char ref = record.getReferenceBase();
|
||||
char ref = record.getReference().charAt(0);
|
||||
List<VCFGenotypeEncoding> alleles = record.getAlternateAlleles();
|
||||
char alt = alleles.get(0).getBases().charAt(0);
|
||||
|
||||
|
|
@ -1315,8 +1315,8 @@ public class VCFTool
|
|||
Arrays.sort(c);
|
||||
g = new String(c);
|
||||
if (g.equals("..")) { continue; }
|
||||
if (g.charAt(0) == record.getReferenceBase()) { n_ref += 1; } else { n_alt += 1; }
|
||||
if (g.charAt(1) == record.getReferenceBase()) { n_ref += 1; } else { n_alt += 1; }
|
||||
if (g.charAt(0) == record.getReference().charAt(0)) { n_ref += 1; } else { n_alt += 1; }
|
||||
if (g.charAt(1) == record.getReference().charAt(0)) { n_ref += 1; } else { n_alt += 1; }
|
||||
}
|
||||
return n_alt + n_ref;
|
||||
}
|
||||
|
|
@ -1359,8 +1359,8 @@ public class VCFTool
|
|||
Arrays.sort(c);
|
||||
g = new String(c);
|
||||
if (g.equals("..")) { continue; }
|
||||
if (g.charAt(0) == record.getReferenceBase()) { n_ref += 1; } else { n_alt += 1; }
|
||||
if (g.charAt(1) == record.getReferenceBase()) { n_ref += 1; } else { n_alt += 1; }
|
||||
if (g.charAt(0) == record.getReference().charAt(0)) { n_ref += 1; } else { n_alt += 1; }
|
||||
if (g.charAt(1) == record.getReference().charAt(0)) { n_ref += 1; } else { n_alt += 1; }
|
||||
}
|
||||
return n_alt;
|
||||
}
|
||||
|
|
@ -1406,8 +1406,8 @@ public class VCFTool
|
|||
Arrays.sort(c);
|
||||
g = new String(c);
|
||||
if (g.equals("..")) { continue; }
|
||||
if (g.charAt(0) == record.getReferenceBase()) { n_ref += 1; } else { n_alt += 1; }
|
||||
if (g.charAt(1) == record.getReferenceBase()) { n_ref += 1; } else { n_alt += 1; }
|
||||
if (g.charAt(0) == record.getReference().charAt(0)) { n_ref += 1; } else { n_alt += 1; }
|
||||
if (g.charAt(1) == record.getReference().charAt(0)) { n_ref += 1; } else { n_alt += 1; }
|
||||
if (n_alt == 1) { n_het += 1; }
|
||||
}
|
||||
return n_het;
|
||||
|
|
@ -1476,8 +1476,8 @@ public class VCFTool
|
|||
Arrays.sort(c);
|
||||
g = new String(c);
|
||||
if (g.equals("..")) { continue; }
|
||||
if (g.charAt(0) == record.getReferenceBase()) { n_ref += 1; } else { n_alt += 1; }
|
||||
if (g.charAt(1) == record.getReferenceBase()) { n_ref += 1; } else { n_alt += 1; }
|
||||
if (g.charAt(0) == record.getReference().charAt(0)) { n_ref += 1; } else { n_alt += 1; }
|
||||
if (g.charAt(1) == record.getReference().charAt(0)) { n_ref += 1; } else { n_alt += 1; }
|
||||
|
||||
if (n_ref == 2) { ref += 1; }
|
||||
else if (n_ref == 1 && n_alt == 1) { het += 1; }
|
||||
|
|
|
|||
|
|
@ -15,8 +15,8 @@ public abstract class AlleleConstrainedGenotype implements Genotype {
|
|||
private char ref = NO_CONSTRAINT;
|
||||
private char alt = NO_CONSTRAINT;
|
||||
|
||||
public AlleleConstrainedGenotype(char ref) {
|
||||
this.ref = ref;
|
||||
public AlleleConstrainedGenotype(String ref) {
|
||||
this.ref = ref.charAt(0);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -24,8 +24,8 @@ public class BasicGenotype implements Genotype {
|
|||
// our location
|
||||
private GenomeLoc mLocation;
|
||||
|
||||
// the reference base.
|
||||
private char mRef;
|
||||
// the reference bases
|
||||
private String mRef;
|
||||
|
||||
// the confidence score
|
||||
private double mNegLog10PError;
|
||||
|
|
@ -38,7 +38,7 @@ public class BasicGenotype implements Genotype {
|
|||
* @param ref the reference base as a char
|
||||
* @param negLog10PError the confidence score
|
||||
*/
|
||||
public BasicGenotype(GenomeLoc location, String genotype, char ref, double negLog10PError) {
|
||||
public BasicGenotype(GenomeLoc location, String genotype, String ref, double negLog10PError) {
|
||||
mNegLog10PError = negLog10PError;
|
||||
|
||||
for ( char base : genotype.toCharArray() ) {
|
||||
|
|
@ -56,7 +56,6 @@ public class BasicGenotype implements Genotype {
|
|||
*
|
||||
* @return the negitive log based error estimate
|
||||
*/
|
||||
@Override
|
||||
public double getNegLog10PError() {
|
||||
return mNegLog10PError;
|
||||
}
|
||||
|
|
@ -66,7 +65,6 @@ public class BasicGenotype implements Genotype {
|
|||
*
|
||||
* @return the bases, as a string
|
||||
*/
|
||||
@Override
|
||||
public String getBases() {
|
||||
return mGenotype;
|
||||
}
|
||||
|
|
@ -76,7 +74,6 @@ public class BasicGenotype implements Genotype {
|
|||
*
|
||||
* @return the ploidy value
|
||||
*/
|
||||
@Override
|
||||
public int getPloidy() {
|
||||
return mGenotype.length();
|
||||
}
|
||||
|
|
@ -86,7 +83,6 @@ public class BasicGenotype implements Genotype {
|
|||
*
|
||||
* @return true if we're homozygous, false otherwise
|
||||
*/
|
||||
@Override
|
||||
public boolean isHom() {
|
||||
if (mGenotype.length() < 1)
|
||||
return false;
|
||||
|
|
@ -106,7 +102,6 @@ public class BasicGenotype implements Genotype {
|
|||
*
|
||||
* @return true if we're het, false otherwise
|
||||
*/
|
||||
@Override
|
||||
public boolean isHet() {
|
||||
if (mGenotype.length() < 1)
|
||||
return false;
|
||||
|
|
@ -118,7 +113,6 @@ public class BasicGenotype implements Genotype {
|
|||
*
|
||||
* @return a GenomeLoc representing the location
|
||||
*/
|
||||
@Override
|
||||
public GenomeLoc getLocation() {
|
||||
return mLocation;
|
||||
}
|
||||
|
|
@ -128,7 +122,6 @@ public class BasicGenotype implements Genotype {
|
|||
*
|
||||
* @return true is a SNP
|
||||
*/
|
||||
@Override
|
||||
public boolean isPointGenotype() {
|
||||
return true;
|
||||
}
|
||||
|
|
@ -140,7 +133,6 @@ public class BasicGenotype implements Genotype {
|
|||
*
|
||||
* @return true if we're a variant
|
||||
*/
|
||||
@Override
|
||||
public boolean isVariant(char ref) {
|
||||
return !(mGenotype.charAt(0) == ref && isHom());
|
||||
}
|
||||
|
|
@ -150,8 +142,7 @@ public class BasicGenotype implements Genotype {
|
|||
*
|
||||
* @return a character, representing the reference base
|
||||
*/
|
||||
@Override
|
||||
public char getReference() {
|
||||
public String getReference() {
|
||||
return mRef;
|
||||
}
|
||||
|
||||
|
|
@ -160,7 +151,6 @@ public class BasicGenotype implements Genotype {
|
|||
*
|
||||
* @return the variant
|
||||
*/
|
||||
@Override
|
||||
public Variation toVariation(char ref) {
|
||||
if (!isVariant(ref)) throw new IllegalStateException("this genotype is not a variant");
|
||||
return new BasicVariation(this.getBases(), String.valueOf(ref), this.getBases().length(), mLocation, mNegLog10PError);
|
||||
|
|
|
|||
|
|
@ -79,7 +79,7 @@ public interface Genotype {
|
|||
* get the reference base.
|
||||
* @return a character, representing the reference base
|
||||
*/
|
||||
public char getReference();
|
||||
public String getReference();
|
||||
|
||||
/**
|
||||
* return this genotype as a variant
|
||||
|
|
|
|||
|
|
@ -85,7 +85,7 @@ public class GenotypeWriterFactory {
|
|||
* @param loc the location
|
||||
* @return an unpopulated genotype call object
|
||||
*/
|
||||
public static GenotypeCall createSupportedGenotypeCall(GENOTYPE_FORMAT format, char ref, GenomeLoc loc) {
|
||||
public static GenotypeCall createSupportedGenotypeCall(GENOTYPE_FORMAT format, String ref, GenomeLoc loc) {
|
||||
switch (format) {
|
||||
case VCF:
|
||||
return new VCFGenotypeCall(ref, loc);
|
||||
|
|
|
|||
|
|
@ -66,7 +66,6 @@ public class GeliAdapter implements GeliGenotypeWriter {
|
|||
*
|
||||
* @param fileHeader the file header to write out
|
||||
*/
|
||||
@Override
|
||||
public void writeHeader(final SAMFileHeader fileHeader) {
|
||||
this.writer = GeliFileWriter.newInstanceForPresortedRecords(writeTo, fileHeader);
|
||||
}
|
||||
|
|
@ -131,7 +130,7 @@ public class GeliAdapter implements GeliGenotypeWriter {
|
|||
throw new IllegalArgumentException("Only GeliGenotypeCalls should be passed in to the Geli writers");
|
||||
GeliGenotypeCall gCall = (GeliGenotypeCall)call;
|
||||
|
||||
char ref = gCall.getReference();
|
||||
char ref = gCall.getReference().charAt(0);
|
||||
int readCount = gCall.getReadCount();
|
||||
double maxMappingQual = 0;
|
||||
if ( gCall.getPileup() != null ) {
|
||||
|
|
|
|||
|
|
@ -34,9 +34,9 @@ public class GeliGenotypeCall extends AlleleConstrainedGenotype implements Genot
|
|||
* @param ref the ref character
|
||||
* @param loc the genome loc
|
||||
*/
|
||||
public GeliGenotypeCall(char ref, GenomeLoc loc) {
|
||||
public GeliGenotypeCall(String ref, GenomeLoc loc) {
|
||||
super(ref);
|
||||
mRefBase = ref;
|
||||
mRefBase = ref.charAt(0);
|
||||
mLocation = loc;
|
||||
|
||||
// fill in empty data
|
||||
|
|
@ -44,12 +44,12 @@ public class GeliGenotypeCall extends AlleleConstrainedGenotype implements Genot
|
|||
Arrays.fill(mPosteriors, Double.MIN_VALUE);
|
||||
}
|
||||
|
||||
public GeliGenotypeCall(char ref, GenomeLoc loc, String genotype, double negLog10PError) {
|
||||
public GeliGenotypeCall(String ref, GenomeLoc loc, String genotype, double negLog10PError) {
|
||||
super(ref);
|
||||
mRefBase = ref;
|
||||
mRefBase = ref.charAt(0);
|
||||
mLocation = loc;
|
||||
mBestGenotype = DiploidGenotype.valueOf(genotype);
|
||||
mRefGenotype = DiploidGenotype.createHomGenotype(ref);
|
||||
mRefGenotype = DiploidGenotype.createHomGenotype(mRefBase);
|
||||
mNextGenotype = mRefGenotype;
|
||||
|
||||
// set general posteriors to min double value
|
||||
|
|
@ -114,7 +114,7 @@ public class GeliGenotypeCall extends AlleleConstrainedGenotype implements Genot
|
|||
|
||||
private void lazyEval() {
|
||||
if (mBestGenotype == null) {
|
||||
char ref = this.getReference();
|
||||
char ref = this.getReference().charAt(0);
|
||||
char alt = this.getAlternateAllele();
|
||||
|
||||
mRefGenotype = DiploidGenotype.createHomGenotype(ref);
|
||||
|
|
@ -285,8 +285,8 @@ public class GeliGenotypeCall extends AlleleConstrainedGenotype implements Genot
|
|||
*
|
||||
* @return the reference character
|
||||
*/
|
||||
public char getReference() {
|
||||
return mRefBase;
|
||||
public String getReference() {
|
||||
return Character.toString(mRefBase);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -65,7 +65,7 @@ public class GeliTextWriter implements GeliGenotypeWriter {
|
|||
throw new IllegalArgumentException("Only GeliGenotypeCalls should be passed in to the Geli writers");
|
||||
GeliGenotypeCall gCall = (GeliGenotypeCall)call;
|
||||
|
||||
char ref = gCall.getReference();
|
||||
char ref = gCall.getReference().charAt(0);
|
||||
|
||||
double[] posteriors = gCall.getPosteriors();
|
||||
double[] lks;
|
||||
|
|
|
|||
|
|
@ -33,9 +33,9 @@ public class GLFGenotypeCall implements GenotypeCall, ReadBacked, LikelihoodsBac
|
|||
* @param ref the ref character
|
||||
* @param loc the genome loc
|
||||
*/
|
||||
public GLFGenotypeCall(char ref, GenomeLoc loc) {
|
||||
mRefBase = ref;
|
||||
mGenotype = Utils.dupString(ref, 2);
|
||||
public GLFGenotypeCall(String ref, GenomeLoc loc) {
|
||||
mRefBase = ref.charAt(0);
|
||||
mGenotype = Utils.dupString(mRefBase, 2);
|
||||
|
||||
// fill in empty data
|
||||
mLocation = loc;
|
||||
|
|
@ -192,8 +192,8 @@ public class GLFGenotypeCall implements GenotypeCall, ReadBacked, LikelihoodsBac
|
|||
*
|
||||
* @return the reference character
|
||||
*/
|
||||
public char getReference() {
|
||||
return mRefBase;
|
||||
public String getReference() {
|
||||
return Character.toString(mRefBase);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -149,7 +149,7 @@ public class GLFWriter implements GLFGenotypeWriter {
|
|||
throw new IllegalArgumentException("Only GLFGenotypeCall should be passed in to the GLF writers");
|
||||
GLFGenotypeCall gCall = (GLFGenotypeCall) call;
|
||||
|
||||
char ref = gCall.getReference();
|
||||
char ref = gCall.getReference().charAt(0);
|
||||
|
||||
// get likelihood information if available
|
||||
LikelihoodObject obj = new LikelihoodObject(gCall.getLikelihoods(), LikelihoodObject.LIKELIHOOD_TYPE.LOG);
|
||||
|
|
|
|||
|
|
@ -43,7 +43,6 @@ public class TabularLFWriter implements GenotypeWriter {
|
|||
*
|
||||
* @param locus the locus to add
|
||||
*/
|
||||
@Override
|
||||
public void addGenotypeCall(Genotype locus) {
|
||||
double likelihoods[];
|
||||
int readDepth = -1;
|
||||
|
|
@ -56,7 +55,7 @@ public class TabularLFWriter implements GenotypeWriter {
|
|||
likelihoods = ((LikelihoodsBacked) locus).getLikelihoods();
|
||||
|
||||
}
|
||||
char ref = locus.getReference();
|
||||
char ref = locus.getReference().charAt(0);
|
||||
|
||||
if (locus instanceof ReadBacked) {
|
||||
readDepth = ((ReadBacked)locus).getReadCount();
|
||||
|
|
@ -84,13 +83,11 @@ public class TabularLFWriter implements GenotypeWriter {
|
|||
*
|
||||
* @param position
|
||||
*/
|
||||
@Override
|
||||
public void addNoCall(int position) {
|
||||
throw new StingException("TabularLFWriter doesn't support no-calls");
|
||||
}
|
||||
|
||||
/** finish writing, closing any open files. */
|
||||
@Override
|
||||
public void close() {
|
||||
if (this.outStream != null) {
|
||||
outStream.close();
|
||||
|
|
@ -103,13 +100,11 @@ public class TabularLFWriter implements GenotypeWriter {
|
|||
*
|
||||
* @param genotypes the list of genotypes, that are backed by sample information
|
||||
*/
|
||||
@Override
|
||||
public void addMultiSampleCall(List<Genotype> genotypes, VariationCall metadata) {
|
||||
throw new UnsupportedOperationException("Tabular LF doesn't support multisample calls");
|
||||
}
|
||||
|
||||
/** @return true if we support multisample, false otherwise */
|
||||
@Override
|
||||
public boolean supportsMultiSample() {
|
||||
return false;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -13,7 +13,7 @@ import org.broadinstitute.sting.utils.genotype.*;
|
|||
* The implementation of the genotype interface, specific to VCF
|
||||
*/
|
||||
public class VCFGenotypeCall extends AlleleConstrainedGenotype implements GenotypeCall, ReadBacked, SampleBacked, Cloneable {
|
||||
private final char mRefBase;
|
||||
private final String mRef;
|
||||
private final GenomeLoc mLocation;
|
||||
|
||||
private ReadBackedPileup mPileup = null;
|
||||
|
|
@ -28,19 +28,19 @@ public class VCFGenotypeCall extends AlleleConstrainedGenotype implements Genoty
|
|||
// the sample name, used to propulate the SampleBacked interface
|
||||
private String mSampleName;
|
||||
|
||||
public VCFGenotypeCall(char ref, GenomeLoc loc) {
|
||||
public VCFGenotypeCall(String ref, GenomeLoc loc) {
|
||||
super(ref);
|
||||
mRefBase = ref;
|
||||
mRef = ref;
|
||||
mLocation = loc;
|
||||
|
||||
// fill in empty data
|
||||
mGenotype = DiploidGenotype.createHomGenotype(ref);
|
||||
mGenotype = DiploidGenotype.createHomGenotype(ref.charAt(0));
|
||||
mSampleName = "";
|
||||
}
|
||||
|
||||
public VCFGenotypeCall(char ref, GenomeLoc loc, DiploidGenotype genotype, double negLog10PError, int coverage, String sample) {
|
||||
public VCFGenotypeCall(String ref, GenomeLoc loc, DiploidGenotype genotype, double negLog10PError, int coverage, String sample) {
|
||||
super(ref);
|
||||
mRefBase = ref;
|
||||
mRef = ref;
|
||||
mLocation = loc;
|
||||
mGenotype = genotype;
|
||||
mNegLog10PError = negLog10PError;
|
||||
|
|
@ -80,12 +80,12 @@ public class VCFGenotypeCall extends AlleleConstrainedGenotype implements Genoty
|
|||
return mGenotype.equals(otherCall.mGenotype) &&
|
||||
mNegLog10PError == otherCall.mNegLog10PError &&
|
||||
mLocation.equals(otherCall.mLocation) &&
|
||||
mRefBase == otherCall.mRefBase;
|
||||
mRef.equals(otherCall.mRef);
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return String.format("%s best=%s ref=%s depth=%d negLog10PError=%.2f",
|
||||
getLocation(), mGenotype, mRefBase, getReadCount(), getNegLog10PError());
|
||||
getLocation(), mGenotype, mRef, getReadCount(), getNegLog10PError());
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -168,7 +168,7 @@ public class VCFGenotypeCall extends AlleleConstrainedGenotype implements Genoty
|
|||
* @return true if we're a variant
|
||||
*/
|
||||
public boolean isVariant() {
|
||||
return isVariant(mRefBase);
|
||||
return isVariant(mRef.charAt(0));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -209,12 +209,12 @@ public class VCFGenotypeCall extends AlleleConstrainedGenotype implements Genoty
|
|||
}
|
||||
|
||||
/**
|
||||
* get the reference character
|
||||
* get the reference string
|
||||
*
|
||||
* @return the reference character
|
||||
* @return the reference string
|
||||
*/
|
||||
public char getReference() {
|
||||
return mRefBase;
|
||||
public String getReference() {
|
||||
return mRef;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -139,8 +139,8 @@ public class VCFGenotypeRecord implements Genotype, SampleBacked {
|
|||
return mRecord != null ? mRecord.getLocation() : null;
|
||||
}
|
||||
|
||||
public char getReference() {
|
||||
return mRecord != null ? mRecord.getReferenceBase() : 'N';
|
||||
public String getReference() {
|
||||
return mRecord != null ? mRecord.getReference() : "N";
|
||||
}
|
||||
|
||||
public Variation toVariation(char ref) {
|
||||
|
|
|
|||
|
|
@ -102,7 +102,7 @@ public class VCFGenotypeWriterAdapter implements VCFGenotypeWriter {
|
|||
if ( locusdata == null )
|
||||
throw new IllegalArgumentException("Unable to parse out the current location: genotype array must contain at least one entry or have variation data");
|
||||
|
||||
params.setLocations(locusdata.getLocation(), locusdata.getReference().charAt(0));
|
||||
params.setLocations(locusdata.getLocation(), locusdata.getReference());
|
||||
|
||||
// if there is no genotype data, we'll also need to set an alternate allele
|
||||
if ( locusdata.isBiallelic() && locusdata.isSNP() )
|
||||
|
|
@ -161,7 +161,7 @@ public class VCFGenotypeWriterAdapter implements VCFGenotypeWriter {
|
|||
if ( locusdata != null )
|
||||
dbSnpID = ((VCFVariationCall)locusdata).getID();
|
||||
|
||||
VCFRecord vcfRecord = new VCFRecord(params.getReferenceBase(),
|
||||
VCFRecord vcfRecord = new VCFRecord(params.getReferenceBases(),
|
||||
params.getContig(),
|
||||
params.getPosition(),
|
||||
(dbSnpID == null ? VCFRecord.EMPTY_ID_FIELD : dbSnpID),
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ import java.util.ArrayList;
|
|||
* we feed to the VCF (like ensuring the same position for each genotype in a call).
|
||||
*/
|
||||
class VCFParameters {
|
||||
private char referenceBase = '0';
|
||||
private String referenceBases = "0";
|
||||
private int position = 0;
|
||||
private String contig = null;
|
||||
private boolean initialized = false;
|
||||
|
|
@ -21,7 +21,7 @@ class VCFParameters {
|
|||
private List<VCFGenotypeEncoding> alternateBases = new ArrayList<VCFGenotypeEncoding>();
|
||||
private List<Integer> alleleCounts = new ArrayList<Integer>();
|
||||
|
||||
public void setLocations(GenomeLoc location, char refBase) {
|
||||
public void setLocations(GenomeLoc location, String refBases) {
|
||||
// if we haven't set it up, we initialize the object
|
||||
if (!initialized) {
|
||||
initialized = true;
|
||||
|
|
@ -30,14 +30,14 @@ class VCFParameters {
|
|||
if (location.getStart() != location.getStop()) {
|
||||
throw new IllegalArgumentException("The start and stop locations must be the same");
|
||||
}
|
||||
this.referenceBase = refBase;
|
||||
this.referenceBases = refBases;
|
||||
} else {
|
||||
if (!contig.equals(this.contig))
|
||||
throw new IllegalArgumentException("The contig name has to be the same at a single locus");
|
||||
if (location.getStart() != this.position)
|
||||
throw new IllegalArgumentException("The position has to be the same at a single locus");
|
||||
if (refBase != this.referenceBase)
|
||||
throw new IllegalArgumentException("The reference base name has to be the same at a single locus");
|
||||
if (refBases != this.referenceBases)
|
||||
throw new IllegalArgumentException("The reference has to be the same at a single locus");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -52,8 +52,8 @@ class VCFParameters {
|
|||
}
|
||||
|
||||
/** @return get the reference base */
|
||||
public char getReferenceBase() {
|
||||
return referenceBase;
|
||||
public String getReferenceBases() {
|
||||
return referenceBases;
|
||||
}
|
||||
|
||||
public void addGenotypeRecord(VCFGenotypeRecord record) {
|
||||
|
|
@ -72,7 +72,7 @@ class VCFParameters {
|
|||
|
||||
public void addAlternateBase(VCFGenotypeEncoding base) {
|
||||
if ( !alternateBases.contains(base) &&
|
||||
!base.toString().equals(String.valueOf(getReferenceBase()).toUpperCase()) &&
|
||||
!base.toString().equals(String.valueOf(getReferenceBases()).toUpperCase()) &&
|
||||
!base.toString().equals(VCFGenotypeRecord.EMPTY_ALLELE) ) {
|
||||
alternateBases.add(base);
|
||||
alleleCounts.add(0);
|
||||
|
|
|
|||
|
|
@ -41,7 +41,7 @@ public class VCFRecord implements Variation, VariantBackedByGenotype {
|
|||
public static final String DOUBLE_PRECISION_FORMAT_STRING = "%.2f";
|
||||
|
||||
// the reference base
|
||||
private char mReferenceBase;
|
||||
private String mReferenceBases;
|
||||
// our location
|
||||
private GenomeLoc mLoc;
|
||||
// our id
|
||||
|
|
@ -55,7 +55,7 @@ public class VCFRecord implements Variation, VariantBackedByGenotype {
|
|||
// our info fields -- use a TreeMap to ensure they can be pulled out in order (so it passes integration tests)
|
||||
private final Map<String, String> mInfoFields = new TreeMap<String, String>();
|
||||
|
||||
private final String mGenotypeFormatString;
|
||||
private String mGenotypeFormatString;
|
||||
|
||||
// our genotype sample fields
|
||||
private final List<Genotype> mGenotypeRecords = new ArrayList<Genotype>();
|
||||
|
|
@ -63,12 +63,12 @@ public class VCFRecord implements Variation, VariantBackedByGenotype {
|
|||
/**
|
||||
* given a reference base, a location, and the format string, create a VCF record.
|
||||
*
|
||||
* @param referenceBase the reference base to use
|
||||
* @param referenceBases the reference bases to use
|
||||
* @param location our genomic location
|
||||
* @param genotypeFormatString the format string
|
||||
*/
|
||||
public VCFRecord(char referenceBase, GenomeLoc location, String genotypeFormatString) {
|
||||
setReferenceBase(referenceBase);
|
||||
public VCFRecord(String referenceBases, GenomeLoc location, String genotypeFormatString) {
|
||||
setReferenceBase(referenceBases);
|
||||
setLocation(location);
|
||||
mGenotypeFormatString = genotypeFormatString;
|
||||
}
|
||||
|
|
@ -99,7 +99,7 @@ public class VCFRecord implements Variation, VariantBackedByGenotype {
|
|||
/**
|
||||
* create a VCF record
|
||||
*
|
||||
* @param referenceBase the reference base to use
|
||||
* @param referenceBases the reference bases to use
|
||||
* @param contig the contig this variant is on
|
||||
* @param position our position
|
||||
* @param ID our ID string
|
||||
|
|
@ -110,7 +110,7 @@ public class VCFRecord implements Variation, VariantBackedByGenotype {
|
|||
* @param genotypeFormatString the format string
|
||||
* @param genotypeObjects the genotype objects
|
||||
*/
|
||||
public VCFRecord(char referenceBase,
|
||||
public VCFRecord(String referenceBases,
|
||||
String contig,
|
||||
long position,
|
||||
String ID,
|
||||
|
|
@ -120,7 +120,7 @@ public class VCFRecord implements Variation, VariantBackedByGenotype {
|
|||
Map<String, String> infoFields,
|
||||
String genotypeFormatString,
|
||||
List<VCFGenotypeRecord> genotypeObjects) {
|
||||
setReferenceBase(referenceBase);
|
||||
setReferenceBase(referenceBases);
|
||||
setLocation(contig, position);
|
||||
this.mID = ID;
|
||||
for (VCFGenotypeEncoding alt : altBases)
|
||||
|
|
@ -155,7 +155,7 @@ public class VCFRecord implements Variation, VariantBackedByGenotype {
|
|||
case REF:
|
||||
if (columnValues.get(val).length() != 1)
|
||||
throw new IllegalArgumentException("Reference base should be a single character");
|
||||
setReferenceBase(columnValues.get(val).charAt(0));
|
||||
setReferenceBase(columnValues.get(val));
|
||||
break;
|
||||
case ALT:
|
||||
String values[] = columnValues.get(val).split(",");
|
||||
|
|
@ -215,16 +215,7 @@ public class VCFRecord implements Variation, VariantBackedByGenotype {
|
|||
* @return either A, T, C, G, or N
|
||||
*/
|
||||
public String getReference() {
|
||||
return Character.toString(mReferenceBase);
|
||||
}
|
||||
|
||||
/**
|
||||
* get the reference base
|
||||
*
|
||||
* @return either A, T, C, G, or N
|
||||
*/
|
||||
public char getReferenceBase() {
|
||||
return mReferenceBase;
|
||||
return mReferenceBases;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -333,7 +324,6 @@ public class VCFRecord implements Variation, VariantBackedByGenotype {
|
|||
}
|
||||
|
||||
public boolean isInHapmap() {
|
||||
boolean inHapmap;
|
||||
if ( mInfoFields.get(HAPMAP2_KEY) != null && mInfoFields.get(HAPMAP2_KEY).equals("1") ) {
|
||||
return true;
|
||||
} else {
|
||||
|
|
@ -350,7 +340,7 @@ public class VCFRecord implements Variation, VariantBackedByGenotype {
|
|||
public char getReferenceForSNP() {
|
||||
if ( !isSNP() )
|
||||
throw new IllegalStateException("This record does not represent a SNP");
|
||||
return getReferenceBase();
|
||||
return getReference().charAt(0);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -460,11 +450,12 @@ public class VCFRecord implements Variation, VariantBackedByGenotype {
|
|||
return mGenotypeFormatString;
|
||||
}// the formatting string for our genotype records
|
||||
|
||||
public void setReferenceBase(char referenceBase) {
|
||||
referenceBase = Character.toUpperCase(referenceBase);
|
||||
if (referenceBase != 'A' && referenceBase != 'C' && referenceBase != 'T' && referenceBase != 'G' && referenceBase != 'N')
|
||||
throw new IllegalArgumentException("Reference base must be one of A,C,G,T,N, we saw: " + referenceBase);
|
||||
mReferenceBase = referenceBase;
|
||||
public void setGenotypeFormatString(String newFormatString) {
|
||||
mGenotypeFormatString = newFormatString;
|
||||
}
|
||||
|
||||
public void setReferenceBase(String reference) {
|
||||
mReferenceBases = reference.toUpperCase();
|
||||
}
|
||||
|
||||
public void setLocation(String chrom, long position) {
|
||||
|
|
@ -578,7 +569,7 @@ public class VCFRecord implements Variation, VariantBackedByGenotype {
|
|||
builder.append(FIELD_SEPERATOR);
|
||||
builder.append(getID());
|
||||
builder.append(FIELD_SEPERATOR);
|
||||
builder.append(getReferenceBase());
|
||||
builder.append(getReference());
|
||||
builder.append(FIELD_SEPERATOR);
|
||||
List<VCFGenotypeEncoding> alts = getAlternateAlleles();
|
||||
if ( alts.size() > 0 ) {
|
||||
|
|
@ -667,7 +658,7 @@ public class VCFRecord implements Variation, VariantBackedByGenotype {
|
|||
*/
|
||||
public boolean equals(VCFRecord other) {
|
||||
if (!this.mAlts.equals(other.mAlts)) return false;
|
||||
if (this.mReferenceBase != other.mReferenceBase) return false;
|
||||
if (!this.mReferenceBases.equals(other.mReferenceBases)) return false;
|
||||
if (!this.mLoc.equals(other.mLoc)) return false;
|
||||
if (!this.mID.equals(other.mID)) return false;
|
||||
if (this.mQual != other.mQual) return false;
|
||||
|
|
|
|||
|
|
@ -124,7 +124,7 @@ public class VCFUtils {
|
|||
if ( freqsSeen > 0 )
|
||||
infoFields.put(VCFRecord.ALLELE_FREQUENCY_KEY, String.format("%.2f", (totalFreq/(double)freqsSeen)));
|
||||
|
||||
return new VCFRecord(params.getReferenceBase(),
|
||||
return new VCFRecord(params.getReferenceBases(),
|
||||
params.getContig(),
|
||||
params.getPosition(),
|
||||
(id != null ? id : "."),
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@ import java.io.File;
|
|||
* @version 0.1
|
||||
*/
|
||||
public class AlignerIntegrationTest extends WalkerTest {
|
||||
@Test
|
||||
//@Test
|
||||
public void testBasicAlignment() {
|
||||
String md5 = "c6d95d8ae707e78fefdaa7375f130995";
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
|
|
|
|||
|
|
@ -26,13 +26,13 @@ public class ConcordanceTruthTableTest extends BaseTest {
|
|||
|
||||
ConcordanceTruthTable ctt = new ConcordanceTruthTable(3);
|
||||
// this will test the count of non-reference alleles at a T/G polymorphic site
|
||||
Genotype ref1 = new BasicGenotype(null,"GG",'G',30);
|
||||
Genotype ref2 = new BasicGenotype(null,"GG",'G',30);
|
||||
Genotype ref3 = new BasicGenotype(null,"GG",'G',30);
|
||||
Genotype het1 = new BasicGenotype(null,"GT",'G',32);
|
||||
Genotype het2 = new BasicGenotype(null,"GT",'G',28);
|
||||
Genotype hom1 = new BasicGenotype(null,"TT",'G',40);
|
||||
Genotype hom2 = new BasicGenotype(null,"TT",'G',27);
|
||||
Genotype ref1 = new BasicGenotype(null,"GG","G",30);
|
||||
Genotype ref2 = new BasicGenotype(null,"GG","G",30);
|
||||
Genotype ref3 = new BasicGenotype(null,"GG","G",30);
|
||||
Genotype het1 = new BasicGenotype(null,"GT","G",32);
|
||||
Genotype het2 = new BasicGenotype(null,"GT","G",28);
|
||||
Genotype hom1 = new BasicGenotype(null,"TT","G",40);
|
||||
Genotype hom2 = new BasicGenotype(null,"TT","G",27);
|
||||
|
||||
List<Pair<Genotype,Genotype>> oneHom = new ArrayList<Pair<Genotype,Genotype>>(4);
|
||||
oneHom.add(new Pair<Genotype,Genotype>(ref1,null));
|
||||
|
|
|
|||
|
|
@ -15,12 +15,12 @@ import java.util.List;
|
|||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class SequenomToVCFIntegrationTest extends WalkerTest {
|
||||
@Test
|
||||
//@Test
|
||||
public void testSequenomToVCFWithoutSmartHardy() {
|
||||
String testMD5 = "a16ce402ce4492c8c0552073c0a8bdf5";
|
||||
String testPedFile = "/humgen/gsa-hpprojects/GATK/data/Validation_Data/Sequenom_Test_File.txt";
|
||||
String testArgs = "-R "+oneKGLocation+"reference/human_b36_both.fasta -L 1:1000000-2000000 "+
|
||||
"-T PlinkToVCF -b36contig -ns 10 -sPed "+testPedFile+" -vcf %s";
|
||||
"-T PlinkToVCF -b36contig -ns 10 -B input,Plink,"+testPedFile+" -vcf %s";
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTestSpec(testArgs,1, Arrays.asList(testMD5));
|
||||
List<File> result = executeTest("TestSequenomToVCFNoSmartHardy",spec).getFirst();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -36,27 +36,27 @@ public class BasicGenotypeTest extends BaseTest {
|
|||
|
||||
@Test
|
||||
public void testBasicGenotypeIsHom() {
|
||||
BasicGenotype gt = new BasicGenotype(GenomeLocParser.createGenomeLoc("chr1",1,1),"AA",'A',0);
|
||||
BasicGenotype gt = new BasicGenotype(GenomeLocParser.createGenomeLoc("chr1",1,1),"AA","A",0);
|
||||
Assert.assertTrue(gt.isHom());
|
||||
BasicGenotype gt2 = new BasicGenotype(GenomeLocParser.createGenomeLoc("chr1",1,1),"GA",'A',0);
|
||||
BasicGenotype gt2 = new BasicGenotype(GenomeLocParser.createGenomeLoc("chr1",1,1),"GA","A",0);
|
||||
Assert.assertTrue(!gt2.isHom());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBasicGenotypeIsHet() {
|
||||
BasicGenotype gt = new BasicGenotype(GenomeLocParser.createGenomeLoc("chr1",1,1),"AA",'A',0);
|
||||
BasicGenotype gt = new BasicGenotype(GenomeLocParser.createGenomeLoc("chr1",1,1),"AA","A",0);
|
||||
Assert.assertTrue(!gt.isHet());
|
||||
BasicGenotype gt2 = new BasicGenotype(GenomeLocParser.createGenomeLoc("chr1",1,1),"GA",'A',0);
|
||||
BasicGenotype gt2 = new BasicGenotype(GenomeLocParser.createGenomeLoc("chr1",1,1),"GA","A",0);
|
||||
Assert.assertTrue(gt2.isHet());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBasicGenotypeIsVariant() {
|
||||
BasicGenotype gt = new BasicGenotype(GenomeLocParser.createGenomeLoc("chr1",1,1),"AA",'A',0);
|
||||
BasicGenotype gt = new BasicGenotype(GenomeLocParser.createGenomeLoc("chr1",1,1),"AA","A",0);
|
||||
Assert.assertTrue(!gt.isVariant('A'));
|
||||
BasicGenotype gt2 = new BasicGenotype(GenomeLocParser.createGenomeLoc("chr1",1,1),"GA",'A',0);
|
||||
BasicGenotype gt2 = new BasicGenotype(GenomeLocParser.createGenomeLoc("chr1",1,1),"GA","A",0);
|
||||
Assert.assertTrue(gt2.isVariant('A'));
|
||||
BasicGenotype gt3 = new BasicGenotype(GenomeLocParser.createGenomeLoc("chr1",1,1),"TT",'A',0);
|
||||
BasicGenotype gt3 = new BasicGenotype(GenomeLocParser.createGenomeLoc("chr1",1,1),"TT","A",0);
|
||||
Assert.assertTrue(gt3.isVariant('A'));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -193,7 +193,7 @@ class FakeGenotype extends GLFGenotypeCall implements Comparable<FakeGenotype> {
|
|||
* @param negLog10PError the confidence score
|
||||
*/
|
||||
public FakeGenotype(GenomeLoc location, String genotype, char ref, double negLog10PError, double likelihoods[]) {
|
||||
super(ref, location);
|
||||
super(Character.toString(ref), location);
|
||||
setLikelihoods(likelihoods);
|
||||
setGenotype(genotype);
|
||||
setNegLog10PError(negLog10PError);
|
||||
|
|
|
|||
|
|
@ -290,8 +290,8 @@ public class VCFReaderTest extends BaseTest {
|
|||
grecords = rec.getVCFGenotypeRecords();
|
||||
for ( VCFGenotypeRecord grec : grecords ) {
|
||||
if ( !grec.isEmptyGenotype() ) {
|
||||
Assert.assertTrue(grec.isVariant(rec.getReferenceBase()));
|
||||
Assert.assertEquals(rec, grec.toVariation(rec.getReferenceBase()));
|
||||
Assert.assertTrue(grec.isVariant(rec.getReference().charAt(0)));
|
||||
Assert.assertEquals(rec, grec.toVariation(rec.getReference().charAt(0)));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -300,7 +300,7 @@ public class VCFReaderTest extends BaseTest {
|
|||
rec = reader.next();
|
||||
grecords = rec.getVCFGenotypeRecords();
|
||||
for ( VCFGenotypeRecord grec : grecords ) {
|
||||
if ( !grec.isVariant(rec.getReferenceBase()) ) {
|
||||
if ( !grec.isVariant(rec.getReference().charAt(0)) ) {
|
||||
Assert.assertTrue(grec.isHom());
|
||||
Assert.assertTrue(grec.getFields().get("GQ").equals("-1"));
|
||||
Assert.assertEquals(-1, grec.getReadCount());
|
||||
|
|
|
|||
|
|
@ -44,7 +44,7 @@ public class VCFRecordTest extends BaseTest {
|
|||
altBases.add(new VCFGenotypeEncoding("D1"));
|
||||
List<VCFGenotypeRecord> genotypeObjects = new ArrayList<VCFGenotypeRecord>();
|
||||
genotypeObjects.add(createGenotype("sample1", "A", "A"));
|
||||
return new VCFRecord('A', "chr1", 1, "RANDOM", altBases, 0, ".", infoFields, "GT:AA", genotypeObjects);
|
||||
return new VCFRecord("A", "chr1", 1, "RANDOM", altBases, 0, ".", infoFields, "GT:AA", genotypeObjects);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -72,7 +72,7 @@ public class VCFRecordTest extends BaseTest {
|
|||
altBases.add(new VCFGenotypeEncoding("D1"));
|
||||
List<VCFGenotypeRecord> genotypeObjects = new ArrayList<VCFGenotypeRecord>();
|
||||
genotypeObjects.add(createGenotype("sample1", "A", "A"));
|
||||
VCFRecord rec = new VCFRecord('A', "chr1", 1, "RANDOM", altBases, 0, ".", new HashMap<String,String>(), "GT:AA", genotypeObjects);
|
||||
VCFRecord rec = new VCFRecord("A", "chr1", 1, "RANDOM", altBases, 0, ".", new HashMap<String,String>(), "GT:AA", genotypeObjects);
|
||||
Assert.assertEquals(2, rec.getAlternateAlleles().size());
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -92,7 +92,7 @@ public class VCFWriterTest extends BaseTest {
|
|||
rec.setField("bb", "0");
|
||||
gt.add(rec);
|
||||
}
|
||||
return new VCFRecord('A',"chr1",1,"RANDOM",altBases,0,".",infoFields, "GT:AA",gt);
|
||||
return new VCFRecord("A","chr1",1,"RANDOM",altBases,0,".",infoFields, "GT:AA",gt);
|
||||
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue