GLF output is now well validated, and some changes for new Genotypes interface code

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1355 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2009-07-31 15:21:28 +00:00
parent afccbc44ec
commit 2f2c8576a5
13 changed files with 189 additions and 38 deletions

View File

@ -154,7 +154,7 @@ public class BasicGenotype implements Genotype {
* @return
*/
@Override
public Variant toVariant() {
public org.broadinstitute.sting.utils.genotype.variant.Variant toVariant() {
return null;
}

View File

@ -1,7 +1,8 @@
package org.broadinstitute.sting.utils.genotype;
import org.broadinstitute.sting.utils.genotype.confidence.ConfidenceScore;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.genotype.confidence.ConfidenceScore;
import org.broadinstitute.sting.utils.genotype.variant.Variant;
/**
* @author aaron
@ -13,6 +14,7 @@ import org.broadinstitute.sting.utils.GenomeLoc;
public interface Genotype {
/**
* get the confidence score
*
* @return get the confidence score that we're based on
*/
public ConfidenceScore getConfidenceScore();
@ -26,6 +28,7 @@ public interface Genotype {
/**
* get the ploidy
*
* @return the ploidy value
*/
public int getPloidy();
@ -46,25 +49,30 @@ public interface Genotype {
/**
* get the genotype's location
*
* @return a GenomeLoc representing the location
*/
public GenomeLoc getLocation();
/**
* returns true if the genotype is a point genotype, false if it's a indel / deletion
*
* @return true is a SNP
*/
public boolean isPointGenotype();
/**
* given the reference, are we a variant? (non-ref)
*
* @param ref the reference base or bases
*
* @return true if we're a variant
*/
public boolean isVariant(char ref);
/**
* return this genotype as a variant
*
* @return
*/
public Variant toVariant();

View File

@ -0,0 +1,47 @@
package org.broadinstitute.sting.utils.genotype;
import org.broadinstitute.sting.utils.genotype.calls.GenotypeCall;
/**
* @author aaron
* <p/>
* Interface GenotypeOutputFormat
* <p/>
* This interface is in adition to the GenotypeCall interface,
* but adds the required functionality that output (GenotypeWriter) interfaces
* need. It's fair game to return 0 or any value for these fields, but the methods
* are all used by various output writers
*/
public interface GenotypeOutput extends GenotypeCall {
/**
* return the likelihoods as a double array, in lexographic order
*
* @return the likelihoods
*/
public double[] getLikelihoods();
/**
* get the depth of the reads at this location
* @return the depth
*/
public int getReadDepth();
/**
* get the rms mapping qualities
* @return
*/
public double getRmsMapping();
/**
* get the best to the next best genotype
* @return
*/
public double getBestNext();
/**
* get the best compaired to the reference score
* @return
*/
public double getBestRef();
}

View File

@ -38,7 +38,7 @@ public interface GenotypeWriter {
* Add a genotype, given a genotype locus
* @param locus the locus to add
*/
public void addGenotypeCall(org.broadinstitute.sting.utils.genotype.calls.GenotypeCall locus);
public void addGenotypeCall(GenotypeOutput locus);
/**
* add a no call to the genotype file, if supported.

View File

@ -1,7 +1,6 @@
package org.broadinstitute.sting.utils.genotype;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.genotype.calls.GenotypeCall;
import java.io.File;
import java.io.FileNotFoundException;
@ -42,19 +41,23 @@ public class TabularLFWriter implements GenotypeWriter {
* @param locus the locus to add
*/
@Override
public void addGenotypeCall(GenotypeCall locus) {
/*outStream.println(String.format("%s %s %c %s %s %f %f %f %f %d %s",
locus.getLocation(),
public void addGenotypeCall(GenotypeOutput locus) {
/**
* This output is not correct, but I don't we even use this format anymore. If we do, someone
* should change this code
*/
outStream.println(String.format("%s %s %c %s %s %f %f %f %f %d %s",
locus.getLocation().toString(),
"NOT OUTPUTED",
locus.getReferencebase(),
locus.getGenotypes().get(1).getBases().,
genotype(),
qhat,
qstar,
lodVsRef,
lodVsNextBest,
depth,
bases); */
locus.getBases(),
locus.getBases(),
-1,
-1,
locus.getBestRef(),
locus.getBestNext(),
locus.getReadDepth(),
locus.getBases()));
}
/**

View File

@ -1,11 +0,0 @@
package org.broadinstitute.sting.utils.genotype;
/**
* @author aaron
* <p/>
* Interface Variant
* <p/>
* This class represents a variant
*/
public interface Variant {
}

View File

@ -6,10 +6,11 @@ import org.broadinstitute.sting.utils.ReadBackedPileup;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.genotype.BasicGenotype;
import org.broadinstitute.sting.utils.genotype.Genotype;
import org.broadinstitute.sting.utils.genotype.GenotypeOutput;
import org.broadinstitute.sting.utils.genotype.LexigraphicalComparator;
import org.broadinstitute.sting.utils.genotype.Variant;
import org.broadinstitute.sting.utils.genotype.confidence.BayesianConfidenceScore;
import org.broadinstitute.sting.utils.genotype.confidence.ConfidenceScore;
import org.broadinstitute.sting.utils.genotype.variant.Variant;
import java.util.ArrayList;
import java.util.Collections;
@ -25,7 +26,7 @@ import java.util.TreeMap;
* The single sample genotypers implementation of the genotype call, which contains
* extra information for the various genotype outputs
*/
public class SSGGenotypeCall implements GenotypeCall {
public class SSGGenotypeCall implements GenotypeCall, GenotypeOutput {
// our stored genotype locus
private final String mRefBase;
private final int mPloidy;

View File

@ -3,10 +3,12 @@ package org.broadinstitute.sting.utils.genotype.geli;
import edu.mit.broad.picard.genotype.geli.GeliFileWriter;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMSequenceRecord;
import org.broadinstitute.sting.utils.genotype.GenotypeOutput;
import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
import org.broadinstitute.sting.utils.genotype.IndelLikelihood;
import org.broadinstitute.sting.utils.genotype.LikelihoodObject;
import org.broadinstitute.sting.utils.genotype.calls.GenotypeCall;
import org.broadinstitute.sting.utils.genotype.calls.SSGGenotypeCall;
import org.broadinstitute.sting.utils.GenomeLocParser;
import java.io.File;
@ -100,8 +102,10 @@ public class GeliAdapter implements GenotypeWriter {
* @param locus the locus to add
*/
@Override
public void addGenotypeCall(GenotypeCall locus) {
// TODO: add code here
public void addGenotypeCall(GenotypeOutput locus) {
SSGGenotypeCall call = (SSGGenotypeCall)locus;
LikelihoodObject obj = new LikelihoodObject(call.getLikelihoods(), LikelihoodObject.LIKELIHOOD_TYPE.LOG);
this.addGenotypeCall(GenomeLocParser.getContigInfo(locus.getLocation().getContig()),(int)locus.getLocation().getStart(),(float)locus.getRmsMapping(),locus.getReferencebase(),locus.getReadDepth(),obj);
}
/**

View File

@ -1,7 +1,7 @@
package org.broadinstitute.sting.utils.genotype.geli;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.genotype.calls.GenotypeCall;
import org.broadinstitute.sting.utils.genotype.GenotypeOutput;
import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
import org.broadinstitute.sting.utils.genotype.calls.SSGGenotypeCall;
@ -40,7 +40,7 @@ public class GeliTextWriter implements GenotypeWriter {
*
* @param locus the locus to add
*/
public void addGenotypeCall(GenotypeCall locus) {
public void addGenotypeCall(GenotypeOutput locus) {
SSGGenotypeCall call = (SSGGenotypeCall)locus;
mWriter.println( String.format("%s %16d %c %8d %d %s %.6f %.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f",

View File

@ -204,7 +204,7 @@ public abstract class GLFRecord {
short bite = ((short) (this.getRecordType().getReadTypeValue() << 4 | (refBase.getBaseHexValue() & 0x0f)));
out.writeUByte((short) (this.getRecordType().getReadTypeValue() << 4 | (refBase.getBaseHexValue() & 0x0f)));
out.writeUInt(((Long) offset).intValue());
int write = ((new Long(readDepth).intValue() & 0xffffff) | this.minimumLikelihood & 0xff << 24);
long write = (long) ((long)(readDepth & 0xffffff) | (long)(this.minimumLikelihood & 0xff) << 24);
out.writeUInt(write);
out.writeUByte((short) rmsMapQ);
}
@ -250,7 +250,7 @@ public abstract class GLFRecord {
for (double d : vals) {
if (d < min) min = d;
}
return (min > 255) ? 255 : (short)min;
return GLFRecord.toCappedShort(min);
}
public REF_BASE getRefBase() {

View File

@ -4,10 +4,10 @@ import net.sf.samtools.SAMSequenceRecord;
import net.sf.samtools.util.BinaryCodec;
import net.sf.samtools.util.BlockCompressedOutputStream;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.genotype.GenotypeOutput;
import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
import org.broadinstitute.sting.utils.genotype.IndelLikelihood;
import org.broadinstitute.sting.utils.genotype.LikelihoodObject;
import org.broadinstitute.sting.utils.genotype.calls.GenotypeCall;
import org.broadinstitute.sting.utils.genotype.calls.SSGGenotypeCall;
import java.io.DataOutputStream;
@ -60,6 +60,8 @@ public class GLFWriter implements GenotypeWriter {
// the last position written
private int lastPos = 1;
int getRidOfMe = 0;
/**
* The public constructor for creating a GLF object
*
@ -90,6 +92,11 @@ public class GLFWriter implements GenotypeWriter {
int readDepth,
LikelihoodObject lhValues) {
getRidOfMe++;
if (getRidOfMe == 500) {
this.close();
} else if (getRidOfMe < 500) {
System.err.println(contig.getSequenceName() + ":" + genomicLoc);
// check if we've jumped to a new contig
checkSequence(contig.getSequenceName(), contig.getSequenceLength());
@ -100,6 +107,7 @@ public class GLFWriter implements GenotypeWriter {
lhValues.toDoubleArray());
lastPos = genomicLoc;
call.write(this.outputBinaryCodec);
}
}
/**
@ -108,11 +116,11 @@ public class GLFWriter implements GenotypeWriter {
* @param locus
*/
@Override
public void addGenotypeCall(GenotypeCall locus) {
public void addGenotypeCall(GenotypeOutput locus) {
SSGGenotypeCall call = (SSGGenotypeCall)locus;
LikelihoodObject obj = new LikelihoodObject(call.getLikelihoods(), LikelihoodObject.LIKELIHOOD_TYPE.LOG);
obj.setLikelihoodType(LikelihoodObject.LIKELIHOOD_TYPE.NEGITIVE_LOG); // transform! ... to negitive log likelihoods
this.addGenotypeCall(GenomeLocParser.getContigInfo(locus.getLocation().getContig()),(int)locus.getLocation().getStart(),(float)0.0,locus.getReferencebase(),0,obj);
this.addGenotypeCall(GenomeLocParser.getContigInfo(locus.getLocation().getContig()),(int)locus.getLocation().getStart(),(float)locus.getRmsMapping(),locus.getReferencebase(),locus.getReadDepth(),obj);
}
/**

View File

@ -0,0 +1,63 @@
package org.broadinstitute.sting.utils.genotype.variant;
import org.broadinstitute.sting.utils.GenomeLoc;
/**
* @author aaron
* <p/>
* Interface Variant
* <p/>
* This class represents a variant
*/
public interface Variant {
// the types of variants we currently allow
public enum VARIANT_TYPE {
SNP, INDEL, DELETION, REFERENCE
}
/**
* get the frequency of this variant
*
* @return VariantFrequency with the stored frequency
*/
public VariantFrequency getFrequency();
/** @return the VARIANT_TYPE of the current variant */
public VARIANT_TYPE getType();
/**
* are we a SNP? If not we're a Indel/deletion
*
* @return true if we're a SNP
*/
public boolean isSNP();
/**
* are we an insertion?
*
* @return true if we are, false otherwise
*/
public boolean isInsertion();
/**
* are we an insertion?
*
* @return true if we are, false otherwise
*/
public boolean isDeletion();
/**
* get the base representation of this Variant
* @return a string, of ploidy
*/
public String toBases();
/**
* get the location that this Variant represents
* @return a GenomeLoc
*/
public GenomeLoc getLocation();
}

View File

@ -0,0 +1,28 @@
package org.broadinstitute.sting.utils.genotype.variant;
/**
*
* @author aaron
*
* Class VariantFrequency
*
* a class that represents the variant frequency, and could serve as a base for any other
* variant frequency information (i.e. is it pop gen, from chip, etc).
*/
public class VariantFrequency {
private double mFrequency;
/**
* create a variant frequency
* @param frequency
*/
public VariantFrequency(double frequency) {
this.mFrequency = frequency;
}
public double getFrequency() {
return mFrequency;
}
}