GLF output is now well validated, and some changes for new Genotypes interface code
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1355 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
afccbc44ec
commit
2f2c8576a5
|
|
@ -154,7 +154,7 @@ public class BasicGenotype implements Genotype {
|
|||
* @return
|
||||
*/
|
||||
@Override
|
||||
public Variant toVariant() {
|
||||
public org.broadinstitute.sting.utils.genotype.variant.Variant toVariant() {
|
||||
return null;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,8 @@
|
|||
package org.broadinstitute.sting.utils.genotype;
|
||||
|
||||
import org.broadinstitute.sting.utils.genotype.confidence.ConfidenceScore;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.genotype.confidence.ConfidenceScore;
|
||||
import org.broadinstitute.sting.utils.genotype.variant.Variant;
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
|
|
@ -13,6 +14,7 @@ import org.broadinstitute.sting.utils.GenomeLoc;
|
|||
public interface Genotype {
|
||||
/**
|
||||
* get the confidence score
|
||||
*
|
||||
* @return get the confidence score that we're based on
|
||||
*/
|
||||
public ConfidenceScore getConfidenceScore();
|
||||
|
|
@ -26,6 +28,7 @@ public interface Genotype {
|
|||
|
||||
/**
|
||||
* get the ploidy
|
||||
*
|
||||
* @return the ploidy value
|
||||
*/
|
||||
public int getPloidy();
|
||||
|
|
@ -46,25 +49,30 @@ public interface Genotype {
|
|||
|
||||
/**
|
||||
* get the genotype's location
|
||||
*
|
||||
* @return a GenomeLoc representing the location
|
||||
*/
|
||||
public GenomeLoc getLocation();
|
||||
|
||||
/**
|
||||
* returns true if the genotype is a point genotype, false if it's a indel / deletion
|
||||
*
|
||||
* @return true is a SNP
|
||||
*/
|
||||
public boolean isPointGenotype();
|
||||
|
||||
/**
|
||||
* given the reference, are we a variant? (non-ref)
|
||||
*
|
||||
* @param ref the reference base or bases
|
||||
*
|
||||
* @return true if we're a variant
|
||||
*/
|
||||
public boolean isVariant(char ref);
|
||||
|
||||
/**
|
||||
* return this genotype as a variant
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public Variant toVariant();
|
||||
|
|
|
|||
|
|
@ -0,0 +1,47 @@
|
|||
package org.broadinstitute.sting.utils.genotype;
|
||||
|
||||
import org.broadinstitute.sting.utils.genotype.calls.GenotypeCall;
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* <p/>
|
||||
* Interface GenotypeOutputFormat
|
||||
* <p/>
|
||||
* This interface is in adition to the GenotypeCall interface,
|
||||
* but adds the required functionality that output (GenotypeWriter) interfaces
|
||||
* need. It's fair game to return 0 or any value for these fields, but the methods
|
||||
* are all used by various output writers
|
||||
*/
|
||||
public interface GenotypeOutput extends GenotypeCall {
|
||||
|
||||
/**
|
||||
* return the likelihoods as a double array, in lexographic order
|
||||
*
|
||||
* @return the likelihoods
|
||||
*/
|
||||
public double[] getLikelihoods();
|
||||
|
||||
/**
|
||||
* get the depth of the reads at this location
|
||||
* @return the depth
|
||||
*/
|
||||
public int getReadDepth();
|
||||
|
||||
/**
|
||||
* get the rms mapping qualities
|
||||
* @return
|
||||
*/
|
||||
public double getRmsMapping();
|
||||
|
||||
/**
|
||||
* get the best to the next best genotype
|
||||
* @return
|
||||
*/
|
||||
public double getBestNext();
|
||||
|
||||
/**
|
||||
* get the best compaired to the reference score
|
||||
* @return
|
||||
*/
|
||||
public double getBestRef();
|
||||
}
|
||||
|
|
@ -38,7 +38,7 @@ public interface GenotypeWriter {
|
|||
* Add a genotype, given a genotype locus
|
||||
* @param locus the locus to add
|
||||
*/
|
||||
public void addGenotypeCall(org.broadinstitute.sting.utils.genotype.calls.GenotypeCall locus);
|
||||
public void addGenotypeCall(GenotypeOutput locus);
|
||||
|
||||
/**
|
||||
* add a no call to the genotype file, if supported.
|
||||
|
|
|
|||
|
|
@ -1,7 +1,6 @@
|
|||
package org.broadinstitute.sting.utils.genotype;
|
||||
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.genotype.calls.GenotypeCall;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
|
|
@ -42,19 +41,23 @@ public class TabularLFWriter implements GenotypeWriter {
|
|||
* @param locus the locus to add
|
||||
*/
|
||||
@Override
|
||||
public void addGenotypeCall(GenotypeCall locus) {
|
||||
/*outStream.println(String.format("%s %s %c %s %s %f %f %f %f %d %s",
|
||||
locus.getLocation(),
|
||||
public void addGenotypeCall(GenotypeOutput locus) {
|
||||
/**
|
||||
* This output is not correct, but I don't we even use this format anymore. If we do, someone
|
||||
* should change this code
|
||||
*/
|
||||
outStream.println(String.format("%s %s %c %s %s %f %f %f %f %d %s",
|
||||
locus.getLocation().toString(),
|
||||
"NOT OUTPUTED",
|
||||
locus.getReferencebase(),
|
||||
locus.getGenotypes().get(1).getBases().,
|
||||
genotype(),
|
||||
qhat,
|
||||
qstar,
|
||||
lodVsRef,
|
||||
lodVsNextBest,
|
||||
depth,
|
||||
bases); */
|
||||
locus.getBases(),
|
||||
locus.getBases(),
|
||||
-1,
|
||||
-1,
|
||||
locus.getBestRef(),
|
||||
locus.getBestNext(),
|
||||
locus.getReadDepth(),
|
||||
locus.getBases()));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -1,11 +0,0 @@
|
|||
package org.broadinstitute.sting.utils.genotype;
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* <p/>
|
||||
* Interface Variant
|
||||
* <p/>
|
||||
* This class represents a variant
|
||||
*/
|
||||
public interface Variant {
|
||||
}
|
||||
|
|
@ -6,10 +6,11 @@ import org.broadinstitute.sting.utils.ReadBackedPileup;
|
|||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.genotype.BasicGenotype;
|
||||
import org.broadinstitute.sting.utils.genotype.Genotype;
|
||||
import org.broadinstitute.sting.utils.genotype.GenotypeOutput;
|
||||
import org.broadinstitute.sting.utils.genotype.LexigraphicalComparator;
|
||||
import org.broadinstitute.sting.utils.genotype.Variant;
|
||||
import org.broadinstitute.sting.utils.genotype.confidence.BayesianConfidenceScore;
|
||||
import org.broadinstitute.sting.utils.genotype.confidence.ConfidenceScore;
|
||||
import org.broadinstitute.sting.utils.genotype.variant.Variant;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
|
|
@ -25,7 +26,7 @@ import java.util.TreeMap;
|
|||
* The single sample genotypers implementation of the genotype call, which contains
|
||||
* extra information for the various genotype outputs
|
||||
*/
|
||||
public class SSGGenotypeCall implements GenotypeCall {
|
||||
public class SSGGenotypeCall implements GenotypeCall, GenotypeOutput {
|
||||
// our stored genotype locus
|
||||
private final String mRefBase;
|
||||
private final int mPloidy;
|
||||
|
|
|
|||
|
|
@ -3,10 +3,12 @@ package org.broadinstitute.sting.utils.genotype.geli;
|
|||
import edu.mit.broad.picard.genotype.geli.GeliFileWriter;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import net.sf.samtools.SAMSequenceRecord;
|
||||
import org.broadinstitute.sting.utils.genotype.GenotypeOutput;
|
||||
import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
|
||||
import org.broadinstitute.sting.utils.genotype.IndelLikelihood;
|
||||
import org.broadinstitute.sting.utils.genotype.LikelihoodObject;
|
||||
import org.broadinstitute.sting.utils.genotype.calls.GenotypeCall;
|
||||
import org.broadinstitute.sting.utils.genotype.calls.SSGGenotypeCall;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
|
||||
import java.io.File;
|
||||
|
||||
|
|
@ -100,8 +102,10 @@ public class GeliAdapter implements GenotypeWriter {
|
|||
* @param locus the locus to add
|
||||
*/
|
||||
@Override
|
||||
public void addGenotypeCall(GenotypeCall locus) {
|
||||
// TODO: add code here
|
||||
public void addGenotypeCall(GenotypeOutput locus) {
|
||||
SSGGenotypeCall call = (SSGGenotypeCall)locus;
|
||||
LikelihoodObject obj = new LikelihoodObject(call.getLikelihoods(), LikelihoodObject.LIKELIHOOD_TYPE.LOG);
|
||||
this.addGenotypeCall(GenomeLocParser.getContigInfo(locus.getLocation().getContig()),(int)locus.getLocation().getStart(),(float)locus.getRmsMapping(),locus.getReferencebase(),locus.getReadDepth(),obj);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
package org.broadinstitute.sting.utils.genotype.geli;
|
||||
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.genotype.calls.GenotypeCall;
|
||||
import org.broadinstitute.sting.utils.genotype.GenotypeOutput;
|
||||
import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
|
||||
import org.broadinstitute.sting.utils.genotype.calls.SSGGenotypeCall;
|
||||
|
||||
|
|
@ -40,7 +40,7 @@ public class GeliTextWriter implements GenotypeWriter {
|
|||
*
|
||||
* @param locus the locus to add
|
||||
*/
|
||||
public void addGenotypeCall(GenotypeCall locus) {
|
||||
public void addGenotypeCall(GenotypeOutput locus) {
|
||||
SSGGenotypeCall call = (SSGGenotypeCall)locus;
|
||||
|
||||
mWriter.println( String.format("%s %16d %c %8d %d %s %.6f %.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f",
|
||||
|
|
|
|||
|
|
@ -204,7 +204,7 @@ public abstract class GLFRecord {
|
|||
short bite = ((short) (this.getRecordType().getReadTypeValue() << 4 | (refBase.getBaseHexValue() & 0x0f)));
|
||||
out.writeUByte((short) (this.getRecordType().getReadTypeValue() << 4 | (refBase.getBaseHexValue() & 0x0f)));
|
||||
out.writeUInt(((Long) offset).intValue());
|
||||
int write = ((new Long(readDepth).intValue() & 0xffffff) | this.minimumLikelihood & 0xff << 24);
|
||||
long write = (long) ((long)(readDepth & 0xffffff) | (long)(this.minimumLikelihood & 0xff) << 24);
|
||||
out.writeUInt(write);
|
||||
out.writeUByte((short) rmsMapQ);
|
||||
}
|
||||
|
|
@ -250,7 +250,7 @@ public abstract class GLFRecord {
|
|||
for (double d : vals) {
|
||||
if (d < min) min = d;
|
||||
}
|
||||
return (min > 255) ? 255 : (short)min;
|
||||
return GLFRecord.toCappedShort(min);
|
||||
}
|
||||
|
||||
public REF_BASE getRefBase() {
|
||||
|
|
|
|||
|
|
@ -4,10 +4,10 @@ import net.sf.samtools.SAMSequenceRecord;
|
|||
import net.sf.samtools.util.BinaryCodec;
|
||||
import net.sf.samtools.util.BlockCompressedOutputStream;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.genotype.GenotypeOutput;
|
||||
import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
|
||||
import org.broadinstitute.sting.utils.genotype.IndelLikelihood;
|
||||
import org.broadinstitute.sting.utils.genotype.LikelihoodObject;
|
||||
import org.broadinstitute.sting.utils.genotype.calls.GenotypeCall;
|
||||
import org.broadinstitute.sting.utils.genotype.calls.SSGGenotypeCall;
|
||||
|
||||
import java.io.DataOutputStream;
|
||||
|
|
@ -60,6 +60,8 @@ public class GLFWriter implements GenotypeWriter {
|
|||
// the last position written
|
||||
private int lastPos = 1;
|
||||
|
||||
int getRidOfMe = 0;
|
||||
|
||||
/**
|
||||
* The public constructor for creating a GLF object
|
||||
*
|
||||
|
|
@ -90,6 +92,11 @@ public class GLFWriter implements GenotypeWriter {
|
|||
int readDepth,
|
||||
LikelihoodObject lhValues) {
|
||||
|
||||
getRidOfMe++;
|
||||
if (getRidOfMe == 500) {
|
||||
this.close();
|
||||
} else if (getRidOfMe < 500) {
|
||||
System.err.println(contig.getSequenceName() + ":" + genomicLoc);
|
||||
// check if we've jumped to a new contig
|
||||
checkSequence(contig.getSequenceName(), contig.getSequenceLength());
|
||||
|
||||
|
|
@ -100,6 +107,7 @@ public class GLFWriter implements GenotypeWriter {
|
|||
lhValues.toDoubleArray());
|
||||
lastPos = genomicLoc;
|
||||
call.write(this.outputBinaryCodec);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -108,11 +116,11 @@ public class GLFWriter implements GenotypeWriter {
|
|||
* @param locus
|
||||
*/
|
||||
@Override
|
||||
public void addGenotypeCall(GenotypeCall locus) {
|
||||
public void addGenotypeCall(GenotypeOutput locus) {
|
||||
SSGGenotypeCall call = (SSGGenotypeCall)locus;
|
||||
LikelihoodObject obj = new LikelihoodObject(call.getLikelihoods(), LikelihoodObject.LIKELIHOOD_TYPE.LOG);
|
||||
obj.setLikelihoodType(LikelihoodObject.LIKELIHOOD_TYPE.NEGITIVE_LOG); // transform! ... to negitive log likelihoods
|
||||
this.addGenotypeCall(GenomeLocParser.getContigInfo(locus.getLocation().getContig()),(int)locus.getLocation().getStart(),(float)0.0,locus.getReferencebase(),0,obj);
|
||||
this.addGenotypeCall(GenomeLocParser.getContigInfo(locus.getLocation().getContig()),(int)locus.getLocation().getStart(),(float)locus.getRmsMapping(),locus.getReferencebase(),locus.getReadDepth(),obj);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -0,0 +1,63 @@
|
|||
package org.broadinstitute.sting.utils.genotype.variant;
|
||||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* <p/>
|
||||
* Interface Variant
|
||||
* <p/>
|
||||
* This class represents a variant
|
||||
*/
|
||||
public interface Variant {
|
||||
// the types of variants we currently allow
|
||||
public enum VARIANT_TYPE {
|
||||
SNP, INDEL, DELETION, REFERENCE
|
||||
}
|
||||
|
||||
/**
|
||||
* get the frequency of this variant
|
||||
*
|
||||
* @return VariantFrequency with the stored frequency
|
||||
*/
|
||||
public VariantFrequency getFrequency();
|
||||
|
||||
/** @return the VARIANT_TYPE of the current variant */
|
||||
public VARIANT_TYPE getType();
|
||||
|
||||
/**
|
||||
* are we a SNP? If not we're a Indel/deletion
|
||||
*
|
||||
* @return true if we're a SNP
|
||||
*/
|
||||
public boolean isSNP();
|
||||
|
||||
/**
|
||||
* are we an insertion?
|
||||
*
|
||||
* @return true if we are, false otherwise
|
||||
*/
|
||||
public boolean isInsertion();
|
||||
|
||||
/**
|
||||
* are we an insertion?
|
||||
*
|
||||
* @return true if we are, false otherwise
|
||||
*/
|
||||
public boolean isDeletion();
|
||||
|
||||
/**
|
||||
* get the base representation of this Variant
|
||||
* @return a string, of ploidy
|
||||
*/
|
||||
public String toBases();
|
||||
|
||||
/**
|
||||
* get the location that this Variant represents
|
||||
* @return a GenomeLoc
|
||||
*/
|
||||
public GenomeLoc getLocation();
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,28 @@
|
|||
package org.broadinstitute.sting.utils.genotype.variant;
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
* @author aaron
|
||||
*
|
||||
* Class VariantFrequency
|
||||
*
|
||||
* a class that represents the variant frequency, and could serve as a base for any other
|
||||
* variant frequency information (i.e. is it pop gen, from chip, etc).
|
||||
*/
|
||||
public class VariantFrequency {
|
||||
|
||||
private double mFrequency;
|
||||
|
||||
/**
|
||||
* create a variant frequency
|
||||
* @param frequency
|
||||
*/
|
||||
public VariantFrequency(double frequency) {
|
||||
this.mFrequency = frequency;
|
||||
}
|
||||
|
||||
public double getFrequency() {
|
||||
return mFrequency;
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue