2009-07-09 04:25:56 +08:00
|
|
|
package org.broadinstitute.sting.gatk.refdata;
|
|
|
|
|
|
2009-07-15 03:45:41 +08:00
|
|
|
import java.util.*;
|
2009-07-09 04:25:56 +08:00
|
|
|
import java.io.IOException;
|
|
|
|
|
import java.io.File;
|
|
|
|
|
|
|
|
|
|
import edu.mit.broad.picard.genotype.geli.GeliFileReader;
|
|
|
|
|
import edu.mit.broad.picard.genotype.geli.GenotypeLikelihoods;
|
2009-07-10 03:18:13 +08:00
|
|
|
import edu.mit.broad.picard.genotype.DiploidGenotype;
|
2009-07-09 04:25:56 +08:00
|
|
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
|
|
|
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
|
|
|
|
|
|
|
|
|
import net.sf.samtools.util.CloseableIterator;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* This class wraps Picard Geli CHiP data and presents it as a ROD.
|
|
|
|
|
*/
|
|
|
|
|
|
2009-07-15 03:45:41 +08:00
|
|
|
public class rodGELI extends BasicReferenceOrderedDatum implements Genotype {
|
2009-07-09 04:25:56 +08:00
|
|
|
// ----------------------------------------------------------------------
|
|
|
|
|
//
|
|
|
|
|
// Constructors
|
|
|
|
|
//
|
|
|
|
|
// ----------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
private GenotypeLikelihoods gh = null;
|
|
|
|
|
|
|
|
|
|
public rodGELI(final String name, GenotypeLikelihoods gh) {
|
|
|
|
|
super(name);
|
|
|
|
|
this.gh = gh;
|
2009-07-10 03:18:13 +08:00
|
|
|
}
|
2009-07-09 04:25:56 +08:00
|
|
|
|
|
|
|
|
@Override
|
|
|
|
|
public GenomeLoc getLocation() {
|
|
|
|
|
return GenomeLocParser.createGenomeLoc(gh.getSequenceIndex(), gh.getPosition());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/** Required by ReferenceOrderedDatum interface. This implementation provides its own iterator,
|
|
|
|
|
* so this method does nothing at all (always returns false).
|
|
|
|
|
*
|
|
|
|
|
*/
|
|
|
|
|
@Override
|
|
|
|
|
public boolean parseLine(Object header, String[] parts) throws IOException {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Override
|
|
|
|
|
public String toString() {
|
2009-07-10 03:18:13 +08:00
|
|
|
final StringBuilder builder = new StringBuilder();
|
|
|
|
|
builder.append(gh.getSequenceName() + "\t");
|
|
|
|
|
builder.append(gh.getPosition() + "\t");
|
|
|
|
|
builder.append(Character.toString((char)(gh.getReferenceBase() & 0xff)) + "\t");
|
|
|
|
|
builder.append(gh.getNumReads() + "\t");
|
|
|
|
|
builder.append(gh.getMaxMappingQuality() + "\t");
|
|
|
|
|
builder.append(gh.getBestGenotype().name() + "\t");
|
|
|
|
|
builder.append(gh.getBestToReferenceLod() + "\t");
|
|
|
|
|
builder.append(gh.getBestToSecondBestLod() + "\t");
|
|
|
|
|
builder.append("\t"); // no dbSNP info in GenotypeLikelihoods class
|
|
|
|
|
for (final DiploidGenotype genotype : DiploidGenotype.values())
|
|
|
|
|
builder.append(gh.getLikelihood(genotype) + "\t");
|
|
|
|
|
builder.append("\n");
|
|
|
|
|
return builder.toString();
|
2009-07-09 04:25:56 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private static class rodGELIIterator implements Iterator<rodGELI> {
|
|
|
|
|
|
|
|
|
|
private String rodName = null;
|
|
|
|
|
private GeliFileReader parser = null;
|
|
|
|
|
private CloseableIterator<GenotypeLikelihoods> iterator = null;
|
|
|
|
|
|
|
|
|
|
rodGELIIterator(String name, File f) {
|
|
|
|
|
rodName = name;
|
|
|
|
|
parser = new GeliFileReader(f);
|
|
|
|
|
iterator = parser.iterator();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public boolean hasNext() {
|
|
|
|
|
return iterator.hasNext();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public rodGELI next() {
|
|
|
|
|
return new rodGELI(rodName, iterator.next());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public void remove() {
|
|
|
|
|
throw new UnsupportedOperationException("'remove' operation is not supported for GELIs");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public static Iterator<rodGELI> createIterator(String name, File file) {
|
|
|
|
|
return new rodGELI.rodGELIIterator(name,file);
|
|
|
|
|
}
|
2009-07-10 03:18:13 +08:00
|
|
|
|
2009-07-15 03:45:41 +08:00
|
|
|
public List<String> getFWDAlleles() {
|
|
|
|
|
return new ArrayList<String>();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public String getFWDRefBases() {
|
|
|
|
|
return "";
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public char getRef() {
|
|
|
|
|
return (char)(gh.getReferenceBase() & 0xff);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public boolean isPointGenotype() { return true; }
|
|
|
|
|
public boolean isIndelGenotype() { return false; }
|
|
|
|
|
public boolean isSNP() { return true; }
|
|
|
|
|
public boolean isReference() { return gh.isHomozygousReference(); }
|
|
|
|
|
public boolean isInsertion() { return false; }
|
|
|
|
|
public boolean isDeletion() { return false; }
|
|
|
|
|
public boolean isIndel() { return false; }
|
|
|
|
|
public boolean isBiallelic() { return false; }
|
|
|
|
|
public boolean isHom() { return gh.isHomozyous(); }
|
|
|
|
|
public boolean isHet() { return !isHom(); }
|
|
|
|
|
|
|
|
|
|
public double getVariantConfidence() {
|
|
|
|
|
return gh.getBestToReferenceLod();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public double getConsensusConfidence() {
|
|
|
|
|
return gh.getBestToSecondBestLod();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2009-07-10 03:18:13 +08:00
|
|
|
public static void main(String argv[]) {
|
|
|
|
|
String testFile = "NA12878.geli";
|
|
|
|
|
|
|
|
|
|
Iterator<rodGELI> it = createIterator("test-geli", new File(testFile));
|
|
|
|
|
|
|
|
|
|
net.sf.picard.reference.ReferenceSequenceFileWalker reference = new net.sf.picard.reference.ReferenceSequenceFileWalker(new File( "/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta"));
|
|
|
|
|
|
|
|
|
|
if ( reference.getSequenceDictionary() == null ) {
|
|
|
|
|
System.out.println("No reference sequence dictionary found. Abort.");
|
|
|
|
|
System.exit(1);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
GenomeLocParser.setupRefContigOrdering(reference.getSequenceDictionary());
|
|
|
|
|
|
|
|
|
|
int counter = 0;
|
|
|
|
|
|
|
|
|
|
while ( it.hasNext() && counter < 500 ) {
|
|
|
|
|
rodGELI rg = it.next();
|
|
|
|
|
System.out.println(rg.toString());
|
|
|
|
|
counter++;
|
|
|
|
|
}
|
|
|
|
|
}
|
2009-07-09 04:25:56 +08:00
|
|
|
}
|