removal of most of the old GATK ROD system; also a fix for -Dsingle so we can again run just a single unit or integration test (single tests in tribble can be run with the -DsingleTest option now). More to come.

*** Three integration tests had to change: ***

RecalibarationWalkersIntegrationTest:
One of the tests was using the interval as the snp track, and wasn't supplying a DbSNP track (for CountCovariates)

SequenomValidationConverterIntegrationTest:
relies on Plink ROD which we've removed.  

PileupWalkerIntegrationTest: 
we no longer have implicit interval tracks, so there isn't a rod name over the specified region.  Otherwise the same result.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4292 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2010-09-15 22:54:49 +00:00
parent c604ed9440
commit 782e0018e4
46 changed files with 664 additions and 3830 deletions

View File

@ -30,9 +30,11 @@ import net.sf.picard.reference.ReferenceSequenceFile;
import net.sf.samtools.*;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
import org.broadinstitute.sting.gatk.datasources.sample.Sample;
import org.broadinstitute.sting.gatk.datasources.sample.SampleDataSource;
import org.broadinstitute.sting.gatk.datasources.sample.SampleFileParser;
import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder;
import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
@ -50,7 +52,6 @@ import org.broadinstitute.sting.gatk.filters.ReadGroupBlackListFilter;
import org.broadinstitute.sting.gatk.io.OutputTracker;
import org.broadinstitute.sting.gatk.io.stubs.*;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackManager;
import org.broadinstitute.sting.gatk.refdata.utils.RMDIntervalGenerator;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.utils.*;
@ -381,12 +382,8 @@ public class GenomeAnalysisEngine {
referenceDataSource = openReferenceSequenceFile(argCollection.referenceFile);
if (argCollection.DBSNPFile != null) bindConvenienceRods(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME, "dbsnp", argCollection.DBSNPFile);
// TODO: The ROD iterator currently does not understand multiple intervals file. Fix this by cleaning the ROD system.
if (argCollection.intervals != null && argCollection.intervals.size() == 1) {
bindConvenienceRods("interval", "Intervals", argCollection.intervals.get(0).replaceAll(",", ""));
}
RMDTrackManager manager = new RMDTrackManager();
RMDTrackBuilder manager = new RMDTrackBuilder();
List<RMDTrack> tracks = manager.getReferenceMetaDataSources(this,argCollection.RODBindings);
validateSuppliedReferenceOrderedDataAgainstWalker(my_walker, tracks);

View File

@ -3,9 +3,8 @@ package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
import org.broad.tribble.FeatureSource;
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator;
import org.broadinstitute.sting.gatk.refdata.tracks.TribbleTrack;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
import org.broadinstitute.sting.gatk.refdata.tracks.builders.TribbleRMDTrackBuilder;
import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder;
import org.broadinstitute.sting.gatk.refdata.utils.FeatureToGATKFeatureIterator;
import org.broadinstitute.sting.gatk.refdata.utils.FlashBackIterator;
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
@ -51,7 +50,7 @@ public class ReferenceOrderedDataSource implements SimpleDataSource {
public ReferenceOrderedDataSource( Walker walker, RMDTrack rod) {
this.rod = rod;
if (rod.supportsQuery())
iteratorPool = new ReferenceOrderedQueryDataPool(new TribbleRMDTrackBuilder(), (TribbleTrack)rod);
iteratorPool = new ReferenceOrderedQueryDataPool(new RMDTrackBuilder(),rod);
else
iteratorPool = new ReferenceOrderedDataPool( walker, rod );
}
@ -112,7 +111,7 @@ class ReferenceOrderedDataPool extends ResourcePool<LocationAwareSeekableRODIter
private final RMDTrack rod;
boolean flashbackData = false;
public ReferenceOrderedDataPool( Walker walker, RMDTrack rod ) {
if (walker instanceof ReadWalker) flashbackData = true; // && (rod.getType() != IntervalRod.class)
if (walker instanceof ReadWalker) flashbackData = true;
this.rod = rod;
}
@ -184,9 +183,9 @@ class ReferenceOrderedQueryDataPool extends ResourcePool<FeatureSource, Location
private final RMDTrack rod;
// our tribble track builder
private final TribbleRMDTrackBuilder builder;
private final RMDTrackBuilder builder;
public ReferenceOrderedQueryDataPool( TribbleRMDTrackBuilder builder, TribbleTrack rod ) {
public ReferenceOrderedQueryDataPool( RMDTrackBuilder builder, RMDTrack rod ) {
this.rod = rod;
this.builder = builder;
// a little bit of a hack, but it saves us from re-reading the index from the file

View File

@ -1,46 +0,0 @@
package org.broadinstitute.sting.gatk.refdata;
import org.broadinstitute.sting.utils.GenomeLoc;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
/**
* Created by IntelliJ IDEA.
* User: mdepristo
* Date: Feb 27, 2009
* Time: 10:49:47 AM
* To change this template use File | Settings | File Templates.
*/
public abstract class BasicReferenceOrderedDatum implements ReferenceOrderedDatum {
protected String name;
public BasicReferenceOrderedDatum(String name) {
this.name = name;
}
public String getName() { return this.name; }
public abstract boolean parseLine(final Object header, final String[] parts) throws IOException;
public abstract String toString();
public String toSimpleString() { return toString(); }
public String repl() { return this.toString(); }
public String delimiterRegex() {
return "\t";
}
public abstract GenomeLoc getLocation();
public int compareTo( ReferenceOrderedDatum that ) {
//System.out.printf("Comparing %s to %s => %d%n", getLocation(), that.getLocation(), getLocation().compareTo(that.getLocation()));
return getLocation().compareTo(that.getLocation());
}
public Object initialize(final File source) throws FileNotFoundException {
//System.out.printf("Initialize called with %s%n", source);
return null;
}
}

View File

@ -1,40 +0,0 @@
package org.broadinstitute.sting.gatk.refdata;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.GenomeLoc;
import java.io.File;
import java.io.IOException;
public class IntervalRod extends BasicReferenceOrderedDatum {
private GenomeLoc loc = null;
public IntervalRod(String name) {
super(name);
}
public IntervalRod(String name, GenomeLoc loc) {
super(name);
this.loc = loc;
}
public GenomeLoc getLocation() { return loc; }
/** Required by ReferenceOrderedDatum interface; this method does nothing (always returns false),
* since this rod provides its own iterator for reading underlying data files.
*/
public boolean parseLine(Object header, String[] parts) {
return false; // this rod has its own iterator
}
public String repl() {
throw new ReviewedStingException("repl() is not implemented yet");
}
public String toSimpleString() { return loc.toString(); }
public String toString() { return toSimpleString(); }
public static IntervalRodIterator createIterator(String trackName, File f) throws IOException {
return IntervalRodIterator.IntervalRodIteratorFromLocsFile(trackName, f);
}
}

View File

@ -1,56 +0,0 @@
package org.broadinstitute.sting.gatk.refdata;
import java.io.File;
import java.util.*;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import org.broadinstitute.sting.utils.interval.IntervalUtils;
public class IntervalRodIterator implements Iterator<IntervalRod> {
//private List<GenomeLoc> locations = null;
private Iterator<GenomeLoc> iter;
private String trackName = null;
//public static RODIterator<IntervalRod> IntervalRodIteratorFromLocs(List<GenomeLoc> locs) {
// IntervalRodIterator it = new IntervalRodIterator("interval", locs);
// return new RODIterator<IntervalRod>(it);
//}
public static IntervalRodIterator IntervalRodIteratorFromLocsFile(final String trackName, final File file) {
//System.out.printf("Parsing %s for intervals %s%n", file, trackName);
GenomeLocSortedSet locs = IntervalUtils.sortAndMergeIntervals(IntervalUtils.parseIntervalArguments(Collections.singletonList(file.getPath())),
GenomeAnalysisEngine.instance.getArguments().intervalMerging);
//System.out.printf(" => got %d entries %n", locs.size());
return new IntervalRodIterator(trackName, locs);
}
public IntervalRodIterator(String trackName, GenomeLocSortedSet locs) {
this.trackName = trackName;
//locations = locs;
iter = locs.iterator();
}
@Override
public boolean hasNext() {
return iter.hasNext();
}
/**
* @return the next element in the iteration.
* @throws NoSuchElementException - iterator has no more elements.
*/
@Override
public IntervalRod next() {
if (!this.hasNext()) throw new NoSuchElementException("IntervalRodIterator next called on iterator with no more elements");
IntervalRod r = new IntervalRod(trackName, iter.next());
//System.out.printf("IntervalRod next is %s%n", r);
return r;
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
}

View File

@ -1,579 +0,0 @@
package org.broadinstitute.sting.gatk.refdata;
import org.broad.tribble.util.variantcontext.Allele;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import java.io.*;
import java.util.*;
/**
* Created by IntelliJ IDEA.
* User: chartl, ebanks
*
*/
public class PlinkRod extends BasicReferenceOrderedDatum implements Iterator<PlinkRod> {
public static final String SEQUENOM_NO_CALL = "0";
public static final String SEQUENOM_NO_BASE = "-";
private final Set<String> headerEntries = new HashSet<String>(Arrays.asList("#Family ID","Individual ID","Sex",
"Paternal ID","Maternal ID","Phenotype", "FID","IID","PAT","MAT","SEX","PHENOTYPE","#Individual ID"));
private final byte SNP_MAJOR_MODE = 1;
private PlinkVariantInfo currentVariant;
private ListIterator<PlinkVariantInfo> variantIterator;
private PlinkFileType plinkFileType;
private List<String> sampleNames;
private String[] fileHeader;
public enum PlinkFileType {
STANDARD_PED, RAW_PED, BINARY_PED
}
public PlinkRod(String name) {
super(name);
}
public PlinkRod(String name, PlinkVariantInfo record, ListIterator<PlinkVariantInfo> iter, List<String> sampleNames) {
super(name);
currentVariant = record;
variantIterator = iter;
this.sampleNames = sampleNames;
}
@Override
public Object initialize(final File plinkFile) throws FileNotFoundException {
if ( ! plinkFile.exists() ) {
throw new FileNotFoundException("File "+plinkFile.getAbsolutePath()+" does not exist.");
}
ArrayList<PlinkVariantInfo> variants = parsePlinkFile(plinkFile);
if ( variants == null )
throw new IllegalStateException("Error parsing Plink file");
variantIterator = variants.listIterator();
return null;
}
public static PlinkRod createIterator(String name, File file) {
PlinkRod plink = new PlinkRod(name);
try {
plink.initialize(file);
} catch (FileNotFoundException e) {
throw new ReviewedStingException("Unable to find file " + file);
}
return plink;
}
private void assertNotNull() {
if ( currentVariant == null ) {
throw new IllegalStateException("Current variant information is null");
}
}
public boolean hasNext() {
return variantIterator.hasNext();
}
public PlinkRod next() {
if ( !this.hasNext() )
throw new NoSuchElementException("PlinkRod next called on iterator with no more elements");
// get the next record
currentVariant = variantIterator.next();
return new PlinkRod(name, currentVariant, variantIterator, sampleNames);
}
@Override
public boolean parseLine(Object obj, String[] args) {
throw new UnsupportedOperationException("PlinkRod does not support the parseLine method");
}
public void remove() {
throw new UnsupportedOperationException("The remove operation is not supported for a PlinkRod");
}
@Override
public GenomeLoc getLocation() {
assertNotNull();
return currentVariant.getLocation();
}
@Override
public String toString() {
assertNotNull();
return currentVariant.toString();
}
public String getVariantName() {
assertNotNull();
return currentVariant.getName();
}
/* Get the mapping from sample name to genotypes (array of Alleles).
* Important note: none of the Alleles returned here are annotated as reference
* (since the rod doesn't know offhand what the reference allele is).
*
* @return mapping from sample name to genotype
*/
public Map<String, Allele[]> getGenotypes() {
return currentVariant.getGenotypes();
}
public List<String> getSampleNames() {
return sampleNames;
}
public boolean isIndel() {
assertNotNull();
return currentVariant.isIndel();
}
public boolean isInsertion() {
assertNotNull();
return currentVariant.isInsertion();
}
public int getLength() {
assertNotNull();
return currentVariant.getLength();
}
// AM I PARSING A TEXT OR A BINARY FILE ??
private ArrayList<PlinkVariantInfo> parsePlinkFile(File file) {
String[] splitFileName = file.getName().split("\\.");
String extension = splitFileName[splitFileName.length-1];
if ( extension.equals("ped") || extension.equals("raw") ) {
return parseTextFormattedPlinkFile(file);
} else if ( extension.equals("bed") || extension.equals("bim") || extension.equals("fam") ) {
plinkFileType = PlinkFileType.BINARY_PED;
return parseBinaryFormattedPlinkFile(file);
} else {
System.out.println("Warning -- Plink file does not have a standard extension (ped/raw for text, bed/bim/fam for binary) -- assuming ped format");
return parseTextFormattedPlinkFile(file);
}
}
/* *** *** *** *** *** ** *** ** *** ** *** ** *** ** ***
* * PARSING STANDARD TEXT PED FILES * **
* *** *** *** *** *** ** *** ** *** ** *** ** *** ** ***/
private ArrayList<PlinkVariantInfo> parseTextFormattedPlinkFile( File file ) {
try {
BufferedReader reader = new BufferedReader( new FileReader ( file ) );
ArrayList<PlinkVariantInfo> seqVars = new ArrayList<PlinkVariantInfo>();
int headerFieldCount = instantiateVariantListFromHeader(seqVars, reader.readLine());
sampleNames = new ArrayList<String>();
String line;
do {
line = reader.readLine();
incorporateInfo(seqVars, line, headerFieldCount);
} while ( line != null );
java.util.Collections.sort(seqVars); // because the comparable uses the GenomeLoc comparable; these
// are sorted in standard reference order
return seqVars;
} catch ( FileNotFoundException e ) {
throw new ReviewedStingException("File "+file.getAbsolutePath()+" could not be found. This was checked earlier. Should never happen.",e);
} catch ( IOException e ) {
throw new ReviewedStingException("Error reading file "+file.getAbsolutePath()+".",e);
}
}
private void incorporateInfo(List<PlinkVariantInfo> vars, String plinkLine, int headerFieldCount) {
if ( plinkLine == null ) {
return;
}
if ( plinkFileType != PlinkFileType.STANDARD_PED )
throw new ReviewedStingException("Plink file is likely of .raw or recoded format. Please use an uncoded .ped file.");
StringTokenizer st = new StringTokenizer(plinkLine, "\t");
int offset = 0;
String sample = st.nextToken();
while ( ! fileHeader[offset].equals("Individual ID") && ! fileHeader[offset].equals("#Individual ID") ) {
sample = st.nextToken(); // kill nonstandard tokens
offset ++;
}
sampleNames.add(sample);
for (int i = offset+1; i < headerFieldCount ; i++)
st.nextToken();
int snpNumber = 0;
while ( snpNumber < vars.size() ) {
vars.get(snpNumber++).addGenotypeEntry(st.nextToken().split("\\s+"));
}
}
private int instantiateVariantListFromHeader(ArrayList<PlinkVariantInfo> seqVars, String header) {
// if the first line is not a comment (what we're used to seeing),
// then it's the raw header (comes from de-binary-ing a .bed file)
if ( !header.startsWith("#") )
throw new ReviewedStingException("Plink file is likely of .raw or recoded format. Please use an uncoded .ped file.");
plinkFileType = PlinkFileType.STANDARD_PED;
String[] headerFields = header.split("\t");
fileHeader = headerFields;
int skippedFields = 0;
for ( String field : headerFields ) {
if ( headerEntries.contains(field) )
skippedFields++;
else
// not a standard header, so a variant
seqVars.add(new PlinkVariantInfo(field));
}
return skippedFields;
}
/* *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
* * PARSING BINARY PLINK BED/BIM/FAM FILES * *
* *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***/
private ArrayList<PlinkVariantInfo> parseBinaryFormattedPlinkFile(File file) {
PlinkBinaryTrifecta binaryFiles = getPlinkBinaryTriplet(file);
ArrayList<PlinkVariantInfo> parsedVariants = instantiateVariantsFromBimFile(binaryFiles.bimFile);
sampleNames = getSampleNameOrderingFromFamFile(binaryFiles.famFile);
ArrayList<PlinkVariantInfo> updatedVariants = getGenotypesFromBedFile(parsedVariants, binaryFiles.bedFile);
java.util.Collections.sort(updatedVariants);
return updatedVariants;
}
private PlinkBinaryTrifecta getPlinkBinaryTriplet(File file) {
// just gonna parse the name
PlinkBinaryTrifecta trifecta = new PlinkBinaryTrifecta();
String absolute_path = file.getAbsolutePath();
String[] directory_tree = absolute_path.split("/");
String file_name = directory_tree[directory_tree.length-1].split("\\.")[0];
StringBuilder pathBuilder = new StringBuilder();
for ( int i = 0; i < directory_tree.length - 1; i ++ ) {
pathBuilder.append(String.format("%s/",directory_tree[i]));
}
String path = pathBuilder.toString();
trifecta.bedFile = new File(path+file_name+".bed");
trifecta.bimFile = new File(path+file_name+".bim");
trifecta.famFile = new File(path+file_name+".fam");
return trifecta;
}
private ArrayList<PlinkVariantInfo> instantiateVariantsFromBimFile(File bimFile) {
BufferedReader reader;
try {
reader = new BufferedReader( new FileReader( bimFile ));
} catch ( FileNotFoundException e) {
throw new ReviewedStingException("The SNP information file accompanying the binary ped file was not found (the .bim file). "+
"Please ensure that it is in the same directory as the .bed and .fam files. The file we "+
"Were looking for was "+bimFile.getAbsolutePath(),e);
}
ArrayList<PlinkVariantInfo> variants = new ArrayList<PlinkVariantInfo>();
try {
String line = reader.readLine();
while ( line != null ) {
String[] snpInfo = line.split("\\s+");
PlinkVariantInfo variant = new PlinkVariantInfo(snpInfo[1]);
variant.setGenomeLoc(GenomeLocParser.parseGenomeLoc(snpInfo[0],Long.valueOf(snpInfo[3]), Long.valueOf(snpInfo[3])));
variant.setAlleles(snpInfo[4],snpInfo[5]);
variants.add(variant);
line = reader.readLine();
}
} catch ( IOException e ) {
throw new ReviewedStingException("There was an error reading the .bim file "+bimFile.getAbsolutePath(), e);
}
return variants;
}
private static ArrayList<String> getSampleNameOrderingFromFamFile(File famFile) {
BufferedReader reader;
try {
reader = new BufferedReader( new FileReader( famFile ));
} catch ( FileNotFoundException e) {
throw new ReviewedStingException("The Family information file accompanying the binary ped file was not found (the .fam file). "+
"Please ensure that it is in the same directory as the .bed and .bim files. The file we "+
"Were looking for was "+famFile.getAbsolutePath(),e);
}
ArrayList<String> sampleNames = new ArrayList<String>();
try {
String line;
do {
line = reader.readLine();
if ( line != null ) {
sampleNames.add(line.split("\\s+")[1]);
}
} while ( line != null );
} catch (IOException e) {
throw new ReviewedStingException("There was an error reading the .fam file "+famFile.getAbsolutePath(),e);
}
return sampleNames;
}
private ArrayList<PlinkVariantInfo> getGenotypesFromBedFile(ArrayList<PlinkVariantInfo> variants, File bedFile) {
FileInputStream inStream;
try {
inStream = new FileInputStream(bedFile);
} catch (FileNotFoundException e) {
throw new ReviewedStingException("The Binary pedigree file file accompanying the family file was not found (the .bed file). "+
"Please ensure that it is in the same directory as the .bim and .fam files. The file we "+
"Were looking for was "+bedFile.getAbsolutePath(),e);
}
try {
byte genotype;
long bytesRead = 0;
int snpOffset = 0;
int sampleOffset = 0;
boolean snpMajorMode = true;
do {
genotype = (byte) inStream.read();
bytesRead++;
if ( genotype != -1 ) {
if ( bytesRead > 3 ) {
addGenotypeByte(genotype,variants,snpOffset,sampleOffset, snpMajorMode);
if ( snpMajorMode ) {
sampleOffset = sampleOffset + 4;
if ( sampleOffset > sampleNames.size() -1 ) {
snpOffset ++;
sampleOffset = 0;
}
} else {
snpOffset = snpOffset + 4;
if ( snpOffset > variants.size() -1 ) {
sampleOffset ++;
snpOffset = 0;
}
}
} else {
if ( bytesRead == 3) {
snpMajorMode = genotype == SNP_MAJOR_MODE;
}
}
}
} while ( genotype != -1 );
} catch ( IOException e) {
throw new ReviewedStingException("Error reading binary ped file "+bedFile.getAbsolutePath(), e);
}
return variants;
}
private void addGenotypeByte(byte genotype, ArrayList<PlinkVariantInfo> variants, int snpOffset, int sampleOffset, boolean major) {
// four genotypes encoded in this byte
int[] genotypes = parseGenotypes(genotype);
for ( int g : genotypes ) {
variants.get(snpOffset).addBinaryGenotypeEntry(g);
if ( major ) {
sampleOffset++;
if ( sampleOffset > sampleNames.size()-1 ) {// using offsets for comparison; size 5 == offset 4
return;
}
} else {
snpOffset++;
if( snpOffset > variants.size()-1 ) {
return;
}
}
}
}
private int[] parseGenotypes(byte genotype) {
int[] genotypes = new int[4];
genotypes[0] = ( genotype & 3 );
genotypes[1] = ( ( genotype & 12 ) >>> 2 );
genotypes[2] = ( ( genotype & 48 ) >>> 4 );
genotypes[3] = ( ( genotype & 192 ) >>> 6 );
return genotypes;
}
class PlinkVariantInfo implements Comparable {
private String variantName;
private GenomeLoc loc;
// the list of genotypes in the same order as in sampleNames (using a map here is inefficient)
private List<Allele[]> genotypes = new ArrayList<Allele[]>();
// map of Alleles seen (so that we can share Allele objects among samples)
HashMap<String, Allele> alleles = new HashMap<String, Allele>(4);
// for indels
private boolean isIndel = false;
private boolean isInsertion = false;
private int length = 1;
// for binary parsing
private String locAllele1;
private String locAllele2;
public PlinkVariantInfo(String variantName) {
this.variantName = variantName;
parseName();
}
public GenomeLoc getLocation() {
return loc;
}
public String getName() {
return variantName;
}
public Map<String, Allele[]> getGenotypes() {
Map<String, Allele[]> genotypeMap = new HashMap<String, Allele[]>();
int index = 0;
for ( Allele[] myAlleles : genotypes )
genotypeMap.put(sampleNames.get(index++), myAlleles);
return genotypeMap;
}
public boolean isIndel() {
return isIndel;
}
public boolean isInsertion() {
return isInsertion;
}
public int getLength() {
return length;
}
public void setGenomeLoc(GenomeLoc loc) {
this.loc = loc;
}
private void parseName() {
int chromIdx = variantName.indexOf("|c");
if ( chromIdx == -1 )
throw new IllegalArgumentException("Variant name " + variantName + " does not adhere to required convention (...|c...)");
String[] pieces = variantName.substring(chromIdx+2).split("_");
if ( pieces.length < 2 )
throw new IllegalArgumentException("Variant name " + variantName + " does not adhere to required convention (...|c..._p...)");
String chrom = pieces[0].trim();
if ( pieces[1].charAt(0) != 'p' )
throw new IllegalArgumentException("Variant name " + variantName + " does not adhere to required convention (...|c..._p...)");
String pos = pieces[1].substring(1).trim();
loc = GenomeLocParser.parseGenomeLoc(chrom+":"+pos);
if ( pieces.length > 2 && (pieces[2].startsWith("gI") || pieces[2].startsWith("gD")) ) {
// it's an indel
isIndel = true;
isInsertion = pieces[2].startsWith("gI");
try {
// length of insertion on reference is still 1
if ( !isInsertion )
length = Integer.parseInt(pieces[2].substring(2));
} catch (NumberFormatException e) {
throw new IllegalArgumentException("Variant name " + variantName + " does not adhere to required convention (...|c..._p..._g[I/D][length])");
}
}
}
public void setAlleles(String al1, String al2) {
if ( al1.equals(PlinkRod.SEQUENOM_NO_CALL) ) {
// encoding for a site at which no variants were detected
locAllele1 = al2;
} else {
locAllele1 = al1;
}
locAllele2 = al2;
}
public void addGenotypeEntry(String[] alleleStrings) {
Allele[] myAlleles = new Allele[2];
for (int i = 0; i < 2; i++) {
if ( alleleStrings.length <= i ) {
myAlleles[i] = null;
continue;
}
String alleleString = alleleStrings[i];
Allele allele;
if ( alleles.containsKey(alleleString) ) {
allele = alleles.get(alleleString);
} else {
if ( PlinkRod.SEQUENOM_NO_CALL.equals(alleleString) )
allele = Allele.NO_CALL;
else
allele = Allele.create(alleleString);
alleles.put(alleleString, allele);
}
myAlleles[i] = allele;
}
genotypes.add(myAlleles);
}
public void addBinaryGenotypeEntry(int genoTYPE) {
String[] alleleStr = new String[2];
if ( genoTYPE == 0 ) {
alleleStr[0] = locAllele1;
alleleStr[1] = locAllele1;
} else if (genoTYPE == 2) {
alleleStr[0] = locAllele1;
alleleStr[1] = locAllele2;
} else if (genoTYPE == 3 ) {
alleleStr[0] = locAllele2;
alleleStr[1] = locAllele2;
} else {
alleleStr[0] = "0";
alleleStr[1] = "0";
}
addGenotypeEntry(alleleStr);
}
public int compareTo(Object obj) {
if ( ! ( obj instanceof PlinkVariantInfo) ) {
return 1;
}
return loc.compareTo(((PlinkVariantInfo) obj).getLocation());
}
}
}
class PlinkBinaryTrifecta {
public PlinkBinaryTrifecta() {}
public File bedFile;
public File bimFile;
public File famFile;
}

View File

@ -1,358 +0,0 @@
/*
* Copyright (c) 2010 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.refdata;
import java.util.*;
import java.util.regex.Pattern;
import java.util.regex.Matcher;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.text.XReadLines;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.apache.log4j.Logger;
/**
* Class for representing arbitrary reference ordered data sets
*
* User: mdepristo
* Date: Feb 27, 2009
* Time: 10:47:14 AM
*
* System for interacting with tabular formatted data of the following format:
*
* # comment line
* # must include HEADER KEYWORD
* HEADER COL1 ... COLN
* chr:pos data1 ... dataN
*
* The system supports the rod interface. You can just access tabularRODs through the normal ROD system.
*
* You can also write your own files, as such:
*
* ArrayList<String> header = new ArrayList<String>(Arrays.asList("HEADER", "col1", "col2", "col3"));
* assertTrue(TabularROD.headerString(header).equals("HEADER\tcol1\tcol2\tcol3"));
* String rowData = String.format("%d %d %d", 1, 2, 3);
* TabularROD row = new TabularROD("myName", header, GenomeLoc.parseGenomeLoc("chrM", 1), rowData.split(" "));
* assertTrue(row.toString().equals("chrM:1\t1\t2\t3"));
*/
public class TabularROD extends BasicReferenceOrderedDatum implements Map<String, String> {
private static Logger logger = Logger.getLogger(TabularROD.class);
protected GenomeLoc loc;
private HashMap<String, String> attributes;
private ArrayList<String> header;
public static String DEFAULT_DELIMITER = "\t";
public static String DEFAULT_DELIMITER_REGEX = "\\s+";
public static String DELIMITER = DEFAULT_DELIMITER;
public static String DELIMITER_REGEX = DEFAULT_DELIMITER_REGEX;
private static int MAX_LINES_TO_LOOK_FOR_HEADER = 1000;
private static Pattern HEADER_PATTERN = Pattern.compile("^\\s*((HEADER)|(loc)|(rs#)).*");
private static Pattern COMMENT_PATTERN = Pattern.compile("^#.*");
private static int parsedRecords = 0;
final static boolean printRecordsParsed = false;
/**
* Set the global tabular ROD delimiter and the regex to split the delimiter.
*
* The delimiter to put between fields, while the regex is used to split lines
*
* @param delimiter
* @param delimeterRegex
*/
public static void setDelimiter(final String delimiter, final String delimeterRegex) {
DELIMITER = delimiter;
DELIMITER_REGEX = delimeterRegex;
}
/**
* Returns a parsable string representation for the
* @param header
*/
public static String headerString(final ArrayList<String> header) {
requireGoodHeader(header);
return Utils.join(DELIMITER, header);
}
/**
* Returns a comment line containing the *single line* string msg
*
* @param msg
* @return
*/
public static String commentString(final String msg) {
return "# " + msg;
}
private static boolean headerIsGood(final ArrayList<String> header) {
if ( header.size() == 0 ) return false;
//if ( ! header.get(0).equals("HEADER") ) return false;
return true;
}
private static void requireGoodHeader(final ArrayList<String> header) {
if ( ! headerIsGood(header) )
throw new RuntimeException("Header must begin with HEADER keyword");
}
// ----------------------------------------------------------------------
//
// Constructors
//
// ----------------------------------------------------------------------
public TabularROD(final String name) {
super(name);
attributes = new HashMap<String, String>();
}
public TabularROD(final String name, ArrayList<String> header) {
this(name);
this.header = header;
}
/**
* Make a new TabularROD with name, using header columns header, at loc, without any bound data. Data
* must be bound to each corresponding header[i] field before the object is really usable.
*
* @param name
* @param header
* @param loc
*/
public TabularROD(final String name, ArrayList<String> header, GenomeLoc loc) {
this(name);
this.header = header;
this.loc = loc;
requireGoodHeader(this.header);
}
/**
* Make a new TabularROD with name, using header columns header, at loc, with data associated with the
* header columns. data and header are assumed to be in the same order, and bindings will be established
* from header[i] = data[i]. The TabularROD at this stage can be printed, manipulated, it is considered
* a full fledged, initialized object.
*
* @param name
* @param header
* @param loc
* @param data
*/
public TabularROD(final String name, ArrayList<String> header, GenomeLoc loc, String[] data) {
this(name, header, loc);
if ( header.size() != data.length + 1 )
throw new RuntimeException(String.format("Incorrect tabular data format: header has %d columns but %d data elements were provided: %s",
header.size(), data.length, Utils.join("\t", data)));
for ( int i = 0; i < data.length; i++ ) {
put(header.get(i+1), data[i]);
}
}
/**
* Walks through the source files looking for the header line, which it returns as a
* list of strings.
*
* @param source
* @return
*/
public Object initialize(final File source) throws FileNotFoundException {
return readHeader(source);
}
/**
* By default, all records (i.e. RODs) must contain the same number of fields as specified
* by the header. Subclasses should override this method to disable this requirement.
*
* @return true if incomplete records are allowed; false otherwise
*/
public boolean allowIncompleteRecords() {
return false;
}
public static ArrayList<String> readHeader(final File source) throws FileNotFoundException {
ArrayList<String> header = null;
int linesLookedAt = 0;
XReadLines reader = new XReadLines(source);
for ( String line : reader ) {
Matcher m = HEADER_PATTERN.matcher(line);
if ( m.matches() ) {
//System.out.printf("Found a header line: %s%n", line);
header = new ArrayList<String>(Arrays.asList(line.split(DELIMITER_REGEX)));
//System.out.printf("HEADER IS %s%n", Utils.join(":", header));
}
if ( linesLookedAt++ > MAX_LINES_TO_LOOK_FOR_HEADER )
break;
}
// check that we found the header
if ( header != null ) {
logger.debug(String.format("HEADER IS %s%n", Utils.join(":", header)));
} else {
// use the indexes as the header fields
logger.debug("USING INDEXES FOR ROD HEADER");
// reset if necessary
if ( !reader.hasNext() )
reader = new XReadLines(source);
header = new ArrayList<String>();
int tokens = reader.next().split(DELIMITER_REGEX).length;
for ( int i = 0; i < tokens; i++)
header.add(Integer.toString(i));
}
try {
reader.close();
} catch ( IOException e ) {
throw new RuntimeException(e);
}
return header;
}
// ----------------------------------------------------------------------
//
// ROD accessors
//
// ----------------------------------------------------------------------
public GenomeLoc getLocation() {
if ( loc != null )
return loc;
String s = get(header.get(0));
if ( s == null )
return null;
return GenomeLocParser.parseGenomeLoc(s);
}
public ArrayList<String> getHeader() {
return header;
}
public String get(final Object key) {
return attributes.get(key);
}
public String put(final String key, final String object) {
return attributes.put(key, object);
}
public boolean containsKey(final Object key) {
return attributes.containsKey(key);
}
public HashMap<String,String> getAttributes() {
return attributes;
}
public String getAttributeString() {
List<String> strings = new ArrayList<String>(attributes.size());
for ( String key : header ) {
if ( containsKey(key) ) { // avoid the header
strings.add(this.get(key));
//System.out.printf("Adding %s%n", this.get(key));
}
}
return Utils.join("\t", strings);
}
// ----------------------------------------------------------------------
//
// map functions
//
// ----------------------------------------------------------------------
public int size() { return attributes.size(); }
public boolean isEmpty() { return attributes.isEmpty(); }
public boolean containsValue(Object o) { return attributes.containsValue(o); }
public String remove(Object o) { return attributes.remove(o); }
public void clear() { attributes.clear(); }
public java.util.Set<String> keySet() { return attributes.keySet(); }
public java.util.Collection<String> values() { return attributes.values(); }
public void putAll(java.util.Map<? extends String, ? extends String> map) {
attributes.putAll(map);
}
public java.util.Set<java.util.Map.Entry<String,String>> entrySet() {
return attributes.entrySet();
}
// ----------------------------------------------------------------------
//
// formatting
//
// ----------------------------------------------------------------------
public String toString() {
if ( loc != null )
return String.format("%s\t%s", loc, getAttributeString());
return String.format("%s", getAttributeString());
}
/**
* The delimiter regular expression that should be used to separate fields in data rows
* and header.
*
* @return
*/
public String delimiterRegex() {
return DELIMITER_REGEX;
}
/**
* Used by ROD management system to set the data in this ROD associated with a line in a rod
*
* @param headerObj
* @param parts
* @return
* @throws IOException
*/
public boolean parseLine(final Object headerObj, final String[] parts) throws IOException {
header = (ArrayList<String>)(headerObj);
//System.out.printf("parts [len=%d] is '%s'%n", parts.length, Utils.join(":", parts));
if ( parts.length == 0 || COMMENT_PATTERN.matcher(parts[0]).matches() || HEADER_PATTERN.matcher(parts[0]).matches() )
return false;
if ( !allowIncompleteRecords() && header.size() != parts.length) {
throw new IOException(String.format("Header length %d not equal to Tabular parts length %d", header.size(), parts.length));
}
for ( int i = 0; i < parts.length; i++ ) {
put(header.get(i), parts[i]);
}
if ( printRecordsParsed ) System.out.printf("Parsed %d records %s%n", ++parsedRecords, this);
return true;
}
}

View File

@ -42,10 +42,8 @@ public class VariantContextAdaptors {
static {
adaptors.put(DbSNPFeature.class, new DBSnpAdaptor());
adaptors.put(PlinkRod.class, new PlinkRodAdaptor());
adaptors.put(HapMapFeature.class, new HapMapAdaptor());
adaptors.put(GeliTextFeature.class, new GeliTextAdaptor());
adaptors.put(rodGELI.class, new GeliAdaptor());
adaptors.put(VariantContext.class, new VariantContextAdaptor());
}
@ -134,96 +132,6 @@ public class VariantContextAdaptors {
return new VCFHeader(hInfo == null ? new HashSet<VCFHeaderLine>() : hInfo, names);
}
// --------------------------------------------------------------------------------------------------------------
//
// Plink to VariantContext
//
// --------------------------------------------------------------------------------------------------------------
private static class PlinkRodAdaptor extends VCAdaptor {
// VariantContext convert(String name, Object input) {
// return convert(name, input, null);
// }
VariantContext convert(String name, Object input, ReferenceContext ref) {
if ( ref == null )
throw new UnsupportedOperationException("Conversion from Plink to VariantContext requires a reference context");
PlinkRod plink = (PlinkRod)input;
HashSet<Allele> VCAlleles = new HashSet<Allele>();
Allele refAllele = determineRefAllele(plink, ref);
VCAlleles.add(refAllele);
// mapping from Plink Allele to VC Allele (since the PlinkRod does not annotate any of the Alleles as reference)
HashMap<Allele, Allele> plinkAlleleToVCAllele = new HashMap<Allele, Allele>();
Set<Genotype> genotypes = new HashSet<Genotype>();
Map<String, Allele[]> genotypeSets = plink.getGenotypes();
// We need to iterate through this list and recreate the Alleles since the
// PlinkRod does not annotate any of the Alleles as reference.
// for each sample...
for ( Map.Entry<String, Allele[]> genotype : genotypeSets.entrySet() ) {
ArrayList<Allele> myVCAlleles = new ArrayList<Allele>(2);
// for each allele...
for ( Allele myPlinkAllele : genotype.getValue() ) {
Allele VCAllele;
if ( myPlinkAllele.isNoCall() ) {
VCAllele = Allele.NO_CALL;
} else {
if ( plinkAlleleToVCAllele.containsKey(myPlinkAllele) ) {
VCAllele = plinkAlleleToVCAllele.get(myPlinkAllele);
} else {
if ( !plink.isIndel() ) {
VCAllele = Allele.create(myPlinkAllele.getBases(), refAllele.equals(myPlinkAllele, true));
} else if ( myPlinkAllele.isNull() ) {
VCAllele = Allele.create(Allele.NULL_ALLELE_STRING, plink.isInsertion());
} else {
VCAllele = Allele.create(myPlinkAllele.getBases(), !plink.isInsertion());
}
plinkAlleleToVCAllele.put(myPlinkAllele, VCAllele);
VCAlleles.add(VCAllele);
}
}
myVCAlleles.add(VCAllele);
}
// create the genotype
genotypes.add(new Genotype(genotype.getKey(), myVCAlleles));
}
// create the variant context
try {
GenomeLoc loc = GenomeLocParser.setStop(plink.getLocation(), plink.getLocation().getStop() + plink.getLength()-1);
VariantContext vc = VariantContextUtils.toVC(plink.getVariantName(), loc, VCAlleles, genotypes);
return vc;
} catch (IllegalArgumentException e) {
throw new IllegalArgumentException(e.getMessage() + "; please make sure that e.g. a sample isn't present more than one time in your ped file");
}
}
private Allele determineRefAllele(PlinkRod plink, ReferenceContext ref) {
Allele refAllele;
if ( !plink.isIndel() ) {
refAllele = Allele.create(ref.getBase(), true);
} else if ( plink.isInsertion() ) {
refAllele = Allele.create(Allele.NULL_ALLELE_STRING, true);
} else {
long maxLength = ref.getWindow().getStop() - ref.getLocus().getStop();
if ( plink.getLength() > maxLength )
throw new UnsupportedOperationException("Plink conversion currently can only handle indels up to length " + maxLength);
refAllele = deletionAllele(ref, 1, plink.getLength());
}
return refAllele;
}
}
// --------------------------------------------------------------------------------------------------------------
//
// GELI to VariantContext
@ -291,78 +199,6 @@ public class VariantContextAdaptors {
}
}
// --------------------------------------------------------------------------------------------------------------
//
// GELI to VariantContext
//
// --------------------------------------------------------------------------------------------------------------
private static class GeliAdaptor extends VCAdaptor {
/**
* convert to a Variant Context, given:
* @param name the name of the ROD
* @param input the Rod object, in this case a RodGeliText
* @return a VariantContext object
*/
// VariantContext convert(String name, Object input) {
// return convert(name, input, null);
// }
/**
* convert to a Variant Context, given:
* @param name the name of the ROD
* @param input the Rod object, in this case a RodGeliText
* @param ref the reference context
* @return a VariantContext object
*/
VariantContext convert(String name, Object input, ReferenceContext ref) {
GenotypeLikelihoods geli = ((rodGELI)input).getGeliRecord();
if ( ! Allele.acceptableAlleleBases(String.valueOf((char)geli.getReferenceBase()),true) )
return null;
Allele refAllele = Allele.create(String.valueOf((char)geli.getReferenceBase()), true);
// add the reference allele
List<Allele> alleles = new ArrayList<Allele>();
alleles.add(refAllele);
// add the two alt alleles
if (!Allele.acceptableAlleleBases(String.valueOf((char) geli.getBestGenotype().getAllele1()),false)) {
return null;
}
Allele allele1 = Allele.create(String.valueOf((char) geli.getBestGenotype().getAllele1()), false);
if (!alleles.contains(allele1) && !allele1.equals(refAllele, true)) alleles.add(allele1);
// add the two alt alleles
if (!Allele.acceptableAlleleBases(String.valueOf((char) geli.getBestGenotype().getAllele2()),false)) {
return null;
}
Allele allele2 = Allele.create(String.valueOf((char) geli.getBestGenotype().getAllele2()), false);
if (!alleles.contains(allele2) && !allele2.equals(refAllele, true)) alleles.add(allele2);
Map<String, String> attributes = new HashMap<String, String>();
Collection<Genotype> genotypes = new ArrayList<Genotype>();
MutableGenotype call = new MutableGenotype(name, alleles);
// setup the genotype likelihoods
double[] post = new double[10];
String[] gTypes = {"AA", "AC", "AG", "AT", "CC", "CG", "CT", "GG", "GT", "TT"};
for (int x = 0; x < 10; x++)
post[x] = Double.valueOf(geli.getLikelihood(DiploidGenotype.fromBases((byte) gTypes[x].charAt(0), (byte) gTypes[x].charAt(1))));
// set the likelihoods, depth, and RMS mapping quality values
//call.putAttribute(CalledGenotype.POSTERIORS_ATTRIBUTE_KEY, post);
//call.putAttribute(GeliTextWriter.MAXIMUM_MAPPING_QUALITY_ATTRIBUTE_KEY, (int) geli.getMaxMappingQuality());
//call.putAttribute(GeliTextWriter.READ_COUNT_ATTRIBUTE_KEY, geli.getNumReads());
// add the call to the genotype list, and then use this list to create a VariantContext
genotypes.add(call);
VariantContext vc = VariantContextUtils.toVC(name, ((rodGELI) input).getLocation(), alleles, genotypes, geli.getBestToReferenceLod(), null, attributes);
return vc;
}
}
// --------------------------------------------------------------------------------------------------------------
//
// HapMap to VariantContext

View File

@ -0,0 +1,63 @@
package org.broadinstitute.sting.gatk.refdata.features.table;
import org.broad.tribble.Feature;
import org.broad.tribble.FeatureCodec;
import org.broad.tribble.readers.LineReader;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.interval.IntervalUtils;
import java.io.IOException;
import java.util.*;
/**
* implementation of a simple table (tab or comma delimited format) input files... more improvements to come
*/
public class TableCodec implements FeatureCodec {
private String delimiterRegex = "\\s+";
private String headerDelimiter = "HEADER";
private String commentDelimiter = "#";
private ArrayList<String> header = new ArrayList<String>();
@Override
public Feature decodeLoc(String line) {
return decode(line);
}
@Override
public Feature decode(String line) {
if (line.startsWith(headerDelimiter) || line.startsWith(commentDelimiter))
return null;
String[] split = line.split(delimiterRegex);
if (split.length < 1)
throw new IllegalArgumentException("TableCodec line = " + line + " doesn't appear to be a valid table format");
return new TableFeature(GenomeLocParser.parseGenomeLoc(split[0]),Arrays.asList(split),header);
}
@Override
public Class getFeatureType() {
return TableFeature.class;
}
@Override
public Object readHeader(LineReader reader) {
String line = "";
try {
while ((line = reader.readLine()) != null) {
if (line.startsWith(headerDelimiter)) {
if (header.size() > 0) throw new IllegalStateException("Input table file seems to have two header lines. The second is = " + line);
String spl[] = line.split(delimiterRegex);
for (String s : spl) header.add(s);
} else if (!line.startsWith(commentDelimiter)) {
break;
}
}
} catch (IOException e) {
throw new UserException.MalformedFile("unable to parse header from TableCodec file",e);
}
return header;
}
}

View File

@ -0,0 +1,56 @@
package org.broadinstitute.sting.gatk.refdata.features.table;
import org.broad.tribble.Feature;
import org.broadinstitute.sting.utils.GenomeLoc;
import java.util.*;
/**
* A feature representing a single row out of a text table
*/
public class TableFeature implements Feature {
// stores the values for the columns seperated out
private final List<String> values;
// if we have column names, we store them here
private final List<String> keys;
// our location
private final GenomeLoc position;
public TableFeature(GenomeLoc position, List<String> values, List<String> keys) {
this.values = values;
this.keys = keys;
this.position = position;
}
@Override
public String getChr() {
return position.getContig();
}
@Override
public int getStart() {
return (int)position.getStart();
}
@Override
public int getEnd() {
return (int)position.getStop();
}
public String getValue(int columnPosition) {
if (columnPosition >= values.size()) throw new IllegalArgumentException("We only have " + values.size() + "columns, the requested column = " + columnPosition);
return values.get(columnPosition);
}
public String get(String columnName) {
int position = keys.indexOf(columnName);
if (position < 0) throw new IllegalArgumentException("We don't have a column named " + columnName);
return values.get(position);
}
public GenomeLoc getLocation() {
return this.position;
}
}

View File

@ -1,162 +0,0 @@
package org.broadinstitute.sting.gatk.refdata;
import java.util.*;
import java.io.IOException;
import java.io.File;
import edu.mit.broad.picard.genotype.geli.GeliFileReader;
import edu.mit.broad.picard.genotype.geli.GenotypeLikelihoods;
import edu.mit.broad.picard.genotype.DiploidGenotype;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import net.sf.samtools.util.CloseableIterator;
/**
* This class wraps Picard Geli CHiP data and presents it as a ROD.
*/
public class rodGELI extends BasicReferenceOrderedDatum {
// ----------------------------------------------------------------------
//
// Constructors
//
// ----------------------------------------------------------------------
private GenotypeLikelihoods gh = null;
public rodGELI(final String name, GenotypeLikelihoods gh) {
super(name);
this.gh = gh;
}
@Override
public GenomeLoc getLocation() {
return GenomeLocParser.createGenomeLoc(gh.getSequenceIndex(), gh.getPosition());
}
public GenotypeLikelihoods getGeliRecord() {
return gh;
}
/** Required by ReferenceOrderedDatum interface. This implementation provides its own iterator,
* so this method does nothing at all (always returns false).
*
*/
@Override
public boolean parseLine(Object header, String[] parts) throws IOException {
return false;
}
@Override
public String toString() {
final StringBuilder builder = new StringBuilder();
builder.append(gh.getSequenceName() + "\t");
builder.append(gh.getPosition() + "\t");
builder.append(Character.toString((char)(gh.getReferenceBase() & 0xff)) + "\t");
builder.append(gh.getNumReads() + "\t");
builder.append(gh.getMaxMappingQuality() + "\t");
builder.append(gh.getBestGenotype().name() + "\t");
builder.append(gh.getBestToReferenceLod() + "\t");
builder.append(gh.getBestToSecondBestLod() + "\t");
builder.append("\t"); // no dbSNP info in GenotypeLikelihoods class
for (final DiploidGenotype genotype : DiploidGenotype.values())
builder.append(gh.getLikelihood(genotype) + "\t");
builder.append("\n");
return builder.toString();
}
private static class rodGELIIterator implements Iterator<rodGELI> {
private String rodName = null;
private GeliFileReader parser = null;
private CloseableIterator<GenotypeLikelihoods> iterator = null;
rodGELIIterator(String name, File f) {
rodName = name;
parser = new GeliFileReader(f);
iterator = parser.iterator();
}
public boolean hasNext() {
return iterator.hasNext();
}
/**
* @return the next element in the iteration.
* @throws NoSuchElementException - iterator has no more elements.
*/
public rodGELI next() {
if (!this.hasNext()) throw new NoSuchElementException("RodGELI next called on iterator with no more elements");
return new rodGELI(rodName, iterator.next());
}
public void remove() {
throw new UnsupportedOperationException("'remove' operation is not supported for GELIs");
}
}
public static Iterator<rodGELI> createIterator(String name, File file) {
return new rodGELI.rodGELIIterator(name,file);
}
/****
* We no longer want to use this class as truth data (sigh)
*
public List<String> getFWDAlleles() {
return new ArrayList<String>();
}
public String getFWDRefBases() {
return "";
}
public char getRef() {
return (char)(gh.getReferenceBase() & 0xff);
}
public boolean isPointGenotype() { return true; }
public boolean isIndelGenotype() { return false; }
public boolean isSNP() { return true; }
public boolean isReference() { return gh.isHomozygousReference(); }
public boolean isInsertion() { return false; }
public boolean isDeletion() { return false; }
public boolean isIndel() { return false; }
public boolean isBiallelic() { return false; }
public boolean isHom() { return gh.isHomozyous(); }
public boolean isHet() { return !isHom(); }
public double getVariantConfidence() {
return gh.getBestToReferenceLod();
}
public double getConsensusConfidence() {
return gh.getBestToSecondBestLod();
}
*
*/
public static void main(String argv[]) {
String testFile = "NA12878.geli";
Iterator<rodGELI> it = createIterator("test-geli", new File(testFile));
net.sf.picard.reference.ReferenceSequenceFileWalker reference = new net.sf.picard.reference.ReferenceSequenceFileWalker(new File( "/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta"));
if ( reference.getSequenceDictionary() == null ) {
System.out.println("No reference sequence dictionary found. Abort.");
System.exit(1);
}
GenomeLocParser.setupRefContigOrdering(reference.getSequenceDictionary());
int counter = 0;
while ( it.hasNext() && counter < 500 ) {
rodGELI rg = it.next();
System.out.println(rg.toString());
counter++;
}
}
}

View File

@ -25,9 +25,16 @@ package org.broadinstitute.sting.gatk.refdata.tracks;
import net.sf.samtools.SAMSequenceDictionary;
import net.sf.samtools.util.CloseableIterator;
import org.broad.tribble.FeatureCodec;
import org.broad.tribble.FeatureSource;
import org.broad.tribble.source.BasicFeatureSource;
import org.broadinstitute.sting.gatk.refdata.utils.FeatureToGATKFeatureIterator;
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.exceptions.UserException;
import java.io.File;
import java.io.IOException;
import java.lang.reflect.Type;
import java.util.Iterator;
@ -39,7 +46,7 @@ import java.util.Iterator;
* <p/>
* the basics of what a reference metadata track must contain.
*/
public abstract class RMDTrack {
public class RMDTrack {
// the basics of a track:
private final Class type; // our type
@ -47,20 +54,14 @@ public abstract class RMDTrack {
private final String name; // the name
private final File file; // the associated file we create the reader from
/**
* Create a track
*
* @param type the type of track, used for track lookup
* @param recordType the type of record produced
* @param name the name of this specific track
* @param file the associated file, for reference or recreating the reader
*/
protected RMDTrack(Class type, Class recordType, String name, File file) {
this.type = type;
this.recordType = recordType;
this.name = name;
this.file = file;
}
// our feature reader - allows queries
private BasicFeatureSource reader;
// our sequence dictionary, which can be null
private final SAMSequenceDictionary dictionary;
// our codec type
private final FeatureCodec codec;
public Class getType() {
return type;
@ -78,12 +79,6 @@ public abstract class RMDTrack {
return recordType;
}
/**
* @return how to get an iterator of the underlying data. This is all a track has to support,
* but other more advanced tracks support the query interface
*/
public abstract CloseableIterator<GATKFeature> getIterator();
/**
* helper function for determining if we are the same track based on name and record type
*
@ -96,21 +91,90 @@ public abstract class RMDTrack {
return (name.equals(this.name) && (type.getClass().isAssignableFrom(this.type.getClass())));
}
/**
* Create a track
*
* @param type the type of track, used for track lookup
* @param name the name of this specific track
* @param file the associated file, for reference or recreating the reader
* @param reader the feature reader to use as the underlying data source
* @param dict the sam sequence dictionary
* @param codec the feature codec we use to decode this type
*/
public RMDTrack(Class type, String name, File file, BasicFeatureSource reader, SAMSequenceDictionary dict, FeatureCodec codec) {
this.type = type;
this.recordType = codec.getFeatureType();
this.name = name;
this.file = file;
this.reader = reader;
this.dictionary = dict;
this.codec = codec;
}
/**
* @return how to get an iterator of the underlying data. This is all a track has to support,
* but other more advanced tracks support the query interface
*/
public CloseableIterator<GATKFeature> getIterator() {
try {
return new FeatureToGATKFeatureIterator(reader.iterator(),this.getName());
} catch (IOException e) {
throw new UserException.CouldNotReadInputFile(getFile(), "Unable to read from file", e);
}
}
/**
* do we support the query interface?
* @return true if we can be cast to the QueryableTrack interface
*
* @return true
*/
public abstract boolean supportsQuery();
public boolean supportsQuery() {
return true;
}
public CloseableIterator<GATKFeature> query(GenomeLoc interval) throws IOException {
return new FeatureToGATKFeatureIterator(reader.query(interval.getContig(),(int)interval.getStart(),(int)interval.getStop()),this.getName());
}
public CloseableIterator<GATKFeature> query(GenomeLoc interval, boolean contained) throws IOException {
return new FeatureToGATKFeatureIterator(reader.query(interval.getContig(),(int)interval.getStart(),(int)interval.getStop()),this.getName());
}
public CloseableIterator<GATKFeature> query(String contig, int start, int stop) throws IOException {
return new FeatureToGATKFeatureIterator(reader.query(contig,start,stop),this.getName());
}
public CloseableIterator<GATKFeature> query(String contig, int start, int stop, boolean contained) throws IOException {
return new FeatureToGATKFeatureIterator(reader.query(contig,start,stop),this.getName());
}
public void close() {
try {
reader.close();
} catch (IOException e) {
throw new UserException.MalformedFile("Unable to close reader " + reader.toString(),e);
}
reader = null;
}
public FeatureSource getReader() {
return reader;
}
/**
* get the sequence dictionary from the track, if available
* @return a SAMSequenceDictionary if available, null if unavailable
*/
public SAMSequenceDictionary getSequenceDictionary() {
return null; // default, others can override this
return dictionary;
}
public Object getHeader() {
return null;
return reader.getHeader();
}
public FeatureCodec getCodec() {
return codec;
}
}

View File

@ -1,183 +0,0 @@
/*
* Copyright (c) 2010 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.refdata.tracks;
import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder;
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.utils.classloader.PluginManager;
import org.broadinstitute.sting.utils.exceptions.UserException;
import java.io.File;
import java.util.*;
/**
* Find the available track builders, and create the requisite tracks from the command line.
*
* In Tribble RMD tracks have two classes:
* - a Feature that is the model/view for the data
* - a Codec that is the controller to generate the Feature.
*
* In this class, the track types are the Codecs. The track record types are the Features.
*/
public class RMDTrackManager extends PluginManager<RMDTrackBuilder> {
// the input strings we use to create RODs from
List<RMDTriplet> inputs = new ArrayList<RMDTriplet>();
// create an active mapping of builder instances, and a map of the name -> class for convenience
/** the tracks that are available to us, associated with their builder */
Map<String, RMDTrackBuilder> availableTrackBuilders;
/** the classes names, with their class description (think the Controller Codecs) */
Map<String, Class> availableTrackTypes;
/** the available track record types (think the Model/View Features) */
Map<String, Class> availableTrackRecordTypes;
/** Create a new track plugin manager. */
public RMDTrackManager() {
super(RMDTrackBuilder.class, "TrackBuilders", null);
}
/**
* find the associated reference meta data
*
* @param bindings the bindings of strings from the -B command line option
*
* @return a list of RMDTracks, one for each -B option
*/
public List<RMDTrack> getReferenceMetaDataSources(GenomeAnalysisEngine engine,List<String> bindings) {
initializeTrackTypes();
initializeBindings(engine,bindings);
// try and make the tracks given their requests
return createRequestedTrackObjects();
}
/**
* Returns a collection of track names that match the record type.
* @param trackRecordType the record type specified in the @RMD annotation
* @return a collection of available track record type names that match the record type
*/
public Collection<String> getTrackRecordTypeNames(Class trackRecordType) {
initializeTrackTypes();
initializeTrackRecordTypes();
Set<String> names = new TreeSet<String>();
for (Map.Entry<String, Class> availableTrackRecordType: availableTrackRecordTypes.entrySet()) {
if (trackRecordType.isAssignableFrom(availableTrackRecordType.getValue()))
names.add(availableTrackRecordType.getKey());
}
return names;
}
/**
* initialize our lists of bindings
* @param engine The engine, used to populate tags.
* @param bindings the input to the GATK, as a list of strings passed in through the -B options
*/
private void initializeBindings(GenomeAnalysisEngine engine,List<String> bindings) {
// NOTE: Method acts as a static. Once the inputs have been passed once they are locked in.
if (inputs.size() > 0 || bindings.size() == 0)
return;
for (String binding: bindings) {
if(engine != null && engine.getTags(binding).size() == 2) {
// Assume that if tags are present, those tags are name and type.
// Name is always first, followed by type.
List<String> parameters = engine.getTags(binding);
String name = parameters.get(0);
String type = parameters.get(1);
inputs.add(new RMDTriplet(name,type,binding));
}
else {
// Otherwise, use old-format bindings.
String[] split = binding.split(",");
if (split.length != 3) throw new IllegalArgumentException(binding + " is not a valid reference metadata track description");
inputs.add(new RMDTriplet(split[0], split[1], split[2]));
}
}
}
/**
* initialize our lists of tracks and builders
*/
private void initializeTrackTypes() {
if (availableTrackBuilders != null && availableTrackTypes != null)
return;
// create an active mapping of builder instances, and a map of the name -> class for convenience
availableTrackBuilders = new HashMap<String, RMDTrackBuilder>();
availableTrackTypes = new HashMap<String, Class>();
createBuilderObjects();
}
/**
* create the builder objects from the retrieved list
*/
private void createBuilderObjects() {
// create a track builder instance for each track builder, and find out what tracks we can make
for (String builderName : this.pluginsByName.keySet()) {
RMDTrackBuilder builder = this.createByName(builderName);
Map<String, Class> mapping = builder.getAvailableTrackNamesAndTypes();
for (String name : mapping.keySet()) {
availableTrackBuilders.put(name.toUpperCase(), builder);
availableTrackTypes.put(name.toUpperCase(), mapping.get(name));
}
}
}
/**
* initialize our list of track record types
*/
private void initializeTrackRecordTypes() {
if (availableTrackRecordTypes != null)
return;
availableTrackRecordTypes = new HashMap<String, Class>();
for (RMDTrackBuilder builder : availableTrackBuilders.values()) {
Map<String, Class> mapping = builder.getAvailableTrackNamesAndRecordTypes();
for (String name : mapping.keySet()) {
availableTrackRecordTypes.put(name.toUpperCase(), mapping.get(name));
}
}
}
/**
* create the requested track objects
*
* @return a list of the tracks, one for each of the requested input tracks
*/
private List<RMDTrack> createRequestedTrackObjects() {
// create of live instances of the tracks
List<RMDTrack> tracks = new ArrayList<RMDTrack>();
// create instances of each of the requested types
for (RMDTriplet trip : inputs) {
RMDTrackBuilder b = availableTrackBuilders.get(trip.getType().toUpperCase());
if (b == null) throw new UserException.CommandLineException("Unable to find track for " + trip.getType());
tracks.add(b.createInstanceOfTrack(availableTrackTypes.get(trip.getType().toUpperCase()), trip.getName(), new File(trip.getFile())));
}
return tracks;
}
}

View File

@ -1,82 +0,0 @@
/*
* Copyright (c) 2010. The Broad Institute
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.refdata.tracks;
import net.sf.samtools.util.CloseableIterator;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeatureIterator;
import java.io.File;
import java.util.Iterator;
/**
*
* @author aaron
*
* Class RODRMDTrack
*
* wrap a reference ordered data object in the new track style. This track will hopefully be phased-out as we move to
* a FeatureReader based system.
*/
public class RODRMDTrack extends RMDTrack {
// our ROD
private ReferenceOrderedData data;
/**
* Create a track
*
* @param type the type of track, used for track lookup
* @param name the name of this specific track
* @param file the associated file, for reference or recreating the reader
* @param data the ROD to use as the underlying data source for this track
*/
public RODRMDTrack(Class type, String name, File file, ReferenceOrderedData data) {
super(type, type, name, file); // the decoder type and the record type are the same in this case
this.data = data;
}
/**
* @return how to get an iterator of the underlying data. This is all a track has to support,
* but other more advanced tracks support the query interface
*/
@Override
public CloseableIterator<GATKFeature> getIterator() {
return new GATKFeatureIterator(data.iterator());
}
/**
* do we support the query interface?
*
* @return false
*/
@Override
public boolean supportsQuery() {
return false; // sadly no, we don't
}
}

View File

@ -1,142 +0,0 @@
/*
* Copyright (c) 2010. The Broad Institute
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.refdata.tracks;
import net.sf.samtools.SAMSequenceDictionary;
import net.sf.samtools.util.CloseableIterator;
import org.broad.tribble.FeatureCodec;
import org.broad.tribble.FeatureSource;
import org.broad.tribble.source.BasicFeatureSource;
import org.broadinstitute.sting.gatk.refdata.utils.FeatureToGATKFeatureIterator;
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.exceptions.UserException;
import java.io.File;
import java.io.IOException;
/**
*
* @author aaron
*
* Class TribbleTrack
*
* A feature reader track, implementing the RMDTrack for tracks that are generated out of Tribble
*/
public class TribbleTrack extends RMDTrack implements QueryableTrack {
// our feature reader - allows queries
private BasicFeatureSource reader;
// our sequence dictionary, which can be null
private final SAMSequenceDictionary dictionary;
// our codec type
private final FeatureCodec codec;
/**
* Create a track
*
* @param type the type of track, used for track lookup
* @param name the name of this specific track
* @param file the associated file, for reference or recreating the reader
* @param reader the feature reader to use as the underlying data source
* @param dict the sam sequence dictionary
* @param codec the feature codec we use to decode this type
*/
public TribbleTrack(Class type, String name, File file, BasicFeatureSource reader, SAMSequenceDictionary dict, FeatureCodec codec) {
super(type, codec.getFeatureType(), name, file);
this.reader = reader;
this.dictionary = dict;
this.codec = codec;
}
/**
* @return how to get an iterator of the underlying data. This is all a track has to support,
* but other more advanced tracks support the query interface
*/
@Override
public CloseableIterator<GATKFeature> getIterator() {
try {
return new FeatureToGATKFeatureIterator(reader.iterator(),this.getName());
} catch (IOException e) {
throw new UserException.CouldNotReadInputFile(getFile(), "Unable to read from file", e);
}
}
/**
* do we support the query interface?
*
* @return true
*/
@Override
public boolean supportsQuery() {
return true;
}
public CloseableIterator<GATKFeature> query(GenomeLoc interval) throws IOException {
return new FeatureToGATKFeatureIterator(reader.query(interval.getContig(),(int)interval.getStart(),(int)interval.getStop()),this.getName());
}
public CloseableIterator<GATKFeature> query(GenomeLoc interval, boolean contained) throws IOException {
return new FeatureToGATKFeatureIterator(reader.query(interval.getContig(),(int)interval.getStart(),(int)interval.getStop()),this.getName());
}
public CloseableIterator<GATKFeature> query(String contig, int start, int stop) throws IOException {
return new FeatureToGATKFeatureIterator(reader.query(contig,start,stop),this.getName());
}
public CloseableIterator<GATKFeature> query(String contig, int start, int stop, boolean contained) throws IOException {
return new FeatureToGATKFeatureIterator(reader.query(contig,start,stop),this.getName());
}
public void close() {
try {
reader.close();
} catch (IOException e) {
throw new ReviewedStingException("Unable to close reader " + reader.toString(),e);
}
reader = null;
}
public FeatureSource getReader() {
return reader;
}
/**
* get the sequence dictionary from the track, if available
* @return a SAMSequenceDictionary if available, null if unavailable
*/
public SAMSequenceDictionary getSequenceDictionary() {
return dictionary;
}
public Object getHeader() {
return reader.getHeader();
}
public FeatureCodec getCodec() {
return codec;
}
}

View File

@ -1,5 +1,6 @@
/*
* Copyright (c) 2010. The Broad Institute
* Copyright (c) 2010 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
@ -11,41 +12,88 @@
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.refdata.tracks.builders;
import net.sf.samtools.SAMSequenceDictionary;
import net.sf.samtools.SAMSequenceRecord;
import org.apache.log4j.Logger;
import org.broad.tribble.*;
import org.broad.tribble.index.Index;
import org.broad.tribble.index.IndexFactory;
import org.broad.tribble.source.BasicFeatureSource;
import org.broad.tribble.util.LittleEndianOutputStream;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackCreationException;
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.classloader.PluginManager;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.file.FSLockWithShared;
import org.broadinstitute.sting.utils.file.FileSystemInabilityToLockException;
import java.io.File;
import java.util.Map;
import java.io.*;
import java.util.*;
/**
* @author aaron
* <p/>
* Interface RMDTrackBuilder
* <p/>
* The basic interface for finding and parsing RMDTracks. Track builders present an interface that allows
* the track manager to find and create tracks of the specified type.
*
* @author aaron
*
* Class RMDTrackBuilder
*
* This class keeps track of the available codecs, and knows how to put together a track of
* that gets iterators from the FeatureReader using Tribble.
*
*/
public interface RMDTrackBuilder {
public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
/**
* our log, which we want to capture anything from this class
*/
private static Logger logger = Logger.getLogger(RMDTrackBuilder.class);
/** @return a list of all available tracks types we currently have access to create */
public Map<String, Class> getAvailableTrackNamesAndTypes();
// the input strings we use to create RODs from
List<RMDTriplet> inputs = new ArrayList<RMDTriplet>();
// the linear index extension
public static final String indexExtension = ".idx";
private Map<String, Class> classes = null;
/** Create a new plugin manager. */
public RMDTrackBuilder() {
super(FeatureCodec.class, "Codecs", "Codec");
}
/** @return a list of all available track types we currently have access to create */
public Map<String, Class> getAvailableTrackNamesAndTypes() {
classes = new HashMap<String, Class>();
for (String name: this.pluginsByName.keySet()) {
classes.put(name.toUpperCase(), pluginsByName.get(name));
}
return classes;
}
/** @return a list of all available track record types we currently have access to create */
public Map<String, Class> getAvailableTrackNamesAndRecordTypes();
public Map<String, Class> getAvailableTrackNamesAndRecordTypes() {
HashMap classToRecord = new HashMap<String, Class>();
for (String name: this.pluginsByName.keySet()) {
FeatureCodec codec = this.createByName(name);
classToRecord.put(name, codec.getFeatureType());
}
return classToRecord;
}
/**
* create a RMDTrack of the specified type
@ -55,7 +103,317 @@ public interface RMDTrackBuilder {
* @param inputFile the input file
*
* @return an instance of the track
* @throws RMDTrackCreationException if we don't know of the target class or we couldn't create it
* @throws RMDTrackCreationException
* if we don't know of the target class or we couldn't create it
*/
public RMDTrack createInstanceOfTrack(Class targetClass, String name, File inputFile) throws RMDTrackCreationException;
public RMDTrack createInstanceOfTrack(Class targetClass, String name, File inputFile) throws RMDTrackCreationException {
// return a feature reader track
Pair<BasicFeatureSource, SAMSequenceDictionary> pair = createFeatureReader(targetClass, name, inputFile);
if (pair == null) throw new UserException.CouldNotReadInputFile(inputFile, "Unable to make the feature reader for input file");
return new RMDTrack(targetClass, name, inputFile, pair.first, pair.second, createCodec(targetClass, name));
}
/**
* create a tribble feature reader class, given the target class and the input file
* @param targetClass the target class, of a Tribble Codec type
* @param inputFile the input file, that corresponds to the feature type
* @return a pair of <BasicFeatureSource, SAMSequenceDictionary>
*/
public Pair<BasicFeatureSource, SAMSequenceDictionary> createFeatureReader(Class targetClass, File inputFile) {
return createFeatureReader(targetClass, "anonymous", inputFile);
}
/**
* create a feature reader of the specified type
* @param targetClass the target codec type
* @param name the target name
* @param inputFile the input file to create the track from (of the codec type)
* @return the FeatureReader instance
*/
public Pair<BasicFeatureSource, SAMSequenceDictionary> createFeatureReader(Class targetClass, String name, File inputFile) {
Pair<BasicFeatureSource, SAMSequenceDictionary> pair;
if (inputFile.getAbsolutePath().endsWith(".gz"))
pair = createBasicFeatureSourceNoAssumedIndex(targetClass, name, inputFile);
else
pair = getLinearFeatureReader(targetClass, name, inputFile);
return pair;
}
/**
* create a feature reader, without assuming there exists an index. This code assumes the feature
* reader of the appropriate type will figure out what the right index type is, and determine if it
* exists.
*
* @param targetClass the codec class type
* @param name the name of the track
* @param inputFile the file to load
* @return a feature reader implementation
*/
private Pair<BasicFeatureSource, SAMSequenceDictionary> createBasicFeatureSourceNoAssumedIndex(Class targetClass, String name, File inputFile) {
// we might not know the index type, try loading with the default reader constructor
logger.info("Attempting to blindly load " + inputFile + " as a tabix indexed file");
try {
return new Pair<BasicFeatureSource, SAMSequenceDictionary>(BasicFeatureSource.getFeatureSource(inputFile.getAbsolutePath(), createCodec(targetClass, name)),null);
} catch (IOException e) {
throw new UserException.CouldNotReadInputFile(inputFile, "Unable to create feature reader from file", e);
}
}
/**
* add a name to the codec, if it takes one
* @param targetClass the class to create a codec for
* @param name the name to assign this codec
* @return the feature codec itself
*/
private FeatureCodec createCodec(Class targetClass, String name) {
FeatureCodec codex = this.createByType(targetClass);
if ( codex instanceof NameAwareCodec )
((NameAwareCodec)codex).setName(name);
return codex;
}
/**
* create a linear feature reader, where we create the index ahead of time
* @param targetClass the target class
* @param name the name of the codec
* @param inputFile the tribble file to parse
* @return the input file as a FeatureReader
*/
private Pair<BasicFeatureSource, SAMSequenceDictionary> getLinearFeatureReader(Class targetClass, String name, File inputFile) {
Pair<BasicFeatureSource, SAMSequenceDictionary> reader;
try {
Index index = loadIndex(inputFile, createCodec(targetClass, name), true);
reader = new Pair<BasicFeatureSource, SAMSequenceDictionary>(new BasicFeatureSource(inputFile.getAbsolutePath(),
index,
createCodec(targetClass, name)),
sequenceSetToDictionary(index.getSequenceNames()));
} catch (FileNotFoundException e) {
throw new UserException.CouldNotReadInputFile(inputFile, "Unable to create reader with file", e);
} catch (IOException e) {
throw new UserException("Unable to make the index file for " + inputFile, e);
}
return reader;
}
/**
* create an index for the input file
* @param inputFile the input file
* @param codec the codec to use
* @param onDisk write the index to disk?
* @return a linear index for the specified type
* @throws IOException if we cannot write the index file
*/
public synchronized static Index loadIndex(File inputFile, FeatureCodec codec, boolean onDisk) throws IOException {
// create the index file name, locking on the index file name
File indexFile = new File(inputFile.getAbsoluteFile() + indexExtension);
FSLockWithShared lock = new FSLockWithShared(indexFile);
// acquire a lock on the file
Index idx = null;
if (indexFile.canRead())
idx = attemptIndexFromDisk(inputFile, codec, indexFile, lock);
// if we managed to make an index, return
if (idx != null) return idx;
// we couldn't read the file, or we fell out of the conditions above, continue on to making a new index
return createNewIndex(inputFile, codec, onDisk, indexFile, lock);
}
/**
* attempt to read the index from disk
* @param inputFile the input file
* @param codec the codec to read from
* @param indexFile the index file itself
* @param lock the lock file
* @return an index, or null if we couldn't load one
* @throws IOException if we fail for FS issues
*/
protected static Index attemptIndexFromDisk(File inputFile, FeatureCodec codec, File indexFile, FSLockWithShared lock) throws IOException {
boolean locked;
try {
locked = lock.sharedLock();
}
catch(FileSystemInabilityToLockException ex) {
throw new UserException.MissortedFile(inputFile, "Unexpected inability to lock exception", ex);
}
Index idx;
try {
if (!locked) // can't lock file
idx = createIndexInMemory(inputFile, codec);
else
idx = loadFromDisk(inputFile, indexFile);
} finally {
if (locked) lock.unlock();
}
return idx;
}
/**
* load the index from disk, checking for out of date indexes and old versions (both of which are deleted)
* @param inputFile the input file
* @param indexFile the input file, plus the index extension
* @return an Index, or null if we're unable to load
*/
public static Index loadFromDisk(File inputFile, File indexFile) {
logger.info("Loading Tribble index from disk for file " + inputFile);
Index index = IndexFactory.loadIndex(indexFile.getAbsolutePath());
// check if the file is up-to date (filestamp and version check)
if (index.isCurrentVersion() && indexFile.lastModified() > inputFile.lastModified())
return index;
else if (indexFile.lastModified() < inputFile.lastModified())
logger.warn("Index file " + indexFile + " is out of date (index older than input file), deleting and updating the index file");
else // we've loaded an old version of the index, we want to remove it <-- currently not used, but may re-enable
logger.warn("Index file " + indexFile + " is out of date (old version), deleting and updating the index file");
// however we got here, remove the index and return null
boolean deleted = indexFile.delete();
if (!deleted) logger.warn("Index file " + indexFile + " is out of date, but could not be removed; it will not be trusted (we'll try to rebuild an in-memory copy)");
return null;
}
/**
* attempt to create the index, and write it to disk
* @param inputFile the input file
* @param codec the codec to use
* @param onDisk if they asked for disk storage or now
* @param indexFile the index file location
* @param lock the locking object
* @return the index object
* @throws IOException when unable to create the new index
*/
private static Index createNewIndex(File inputFile, FeatureCodec codec, boolean onDisk, File indexFile, FSLockWithShared lock) throws IOException {
Index index = createIndexInMemory(inputFile, codec);
boolean locked = false; // could we exclusive lock the file?
try {
locked = lock.exclusiveLock();
if (locked) {
logger.info("Writing Tribble index to disk for file " + inputFile);
LittleEndianOutputStream stream = new LittleEndianOutputStream(new FileOutputStream(indexFile));
index.write(stream);
stream.close();
}
else // we can't write it to disk, just store it in memory, tell them this
if (onDisk) logger.info("Unable to write to " + indexFile + " for the index file, creating index in memory only");
return index;
}
catch(FileSystemInabilityToLockException ex) {
throw new UserException.MissortedFile(inputFile,"Unexpected inability to lock exception", ex);
}
finally {
if (locked) lock.unlock();
}
}
/**
* create the index in memory, given the input file and feature codec
* @param inputFile the input file
* @param codec the codec
* @return a LinearIndex, given the file location
* @throws IOException when unable to create the index in memory
*/
private static Index createIndexInMemory(File inputFile, FeatureCodec codec) throws IOException {
// this can take a while, let them know what we're doing
logger.info("Creating Tribble index in memory for file " + inputFile);
return IndexFactory.createIndex(inputFile, codec, IndexFactory.IndexBalanceApproach.FOR_SEEK_TIME);
}
/**
* convert a list of Strings into a sequence dictionary
* @param contigList the contig list, in coordinate order, this is allowed to be null
* @return a SAMSequenceDictionary, WITHOUT contig sizes
*/
private static SAMSequenceDictionary sequenceSetToDictionary(LinkedHashSet<String> contigList) {
SAMSequenceDictionary dict = new SAMSequenceDictionary();
if (contigList == null) return dict;
for (String name : contigList) {
SAMSequenceRecord seq = new SAMSequenceRecord(name, 0);
dict.addSequence(seq);
}
return dict;
}
/**
* Returns a collection of track names that match the record type.
* @param trackRecordType the record type specified in the @RMD annotation
* @return a collection of available track record type names that match the record type
*/
public Collection<String> getTrackRecordTypeNames(Class trackRecordType) {
Set<String> names = new TreeSet<String>();
if (trackRecordType == null)
throw new IllegalArgumentException("trackRecordType value is null, please pass in an actual class object");
for (Map.Entry<String, Class> availableTrackRecordType: getAvailableTrackNamesAndRecordTypes().entrySet()) {
if (availableTrackRecordType.getValue() != null && trackRecordType.isAssignableFrom(availableTrackRecordType.getValue()))
names.add(availableTrackRecordType.getKey());
}
return names;
}
/**
* find the associated reference meta data
*
* @param bindings the bindings of strings from the -B command line option
*
* @return a list of RMDTracks, one for each -B option
*/
public List<RMDTrack> getReferenceMetaDataSources(GenomeAnalysisEngine engine,List<String> bindings) {
initializeBindings(engine,bindings);
// try and make the tracks given their requests
return createRequestedTrackObjects();
}
/**
* initialize our lists of bindings
* @param engine The engine, used to populate tags.
* @param bindings the input to the GATK, as a list of strings passed in through the -B options
*/
private void initializeBindings(GenomeAnalysisEngine engine,List<String> bindings) {
// NOTE: Method acts as a static. Once the inputs have been passed once they are locked in.
if (inputs.size() > 0 || bindings.size() == 0)
return;
for (String binding: bindings) {
if(engine != null && engine.getTags(binding).size() == 2) {
// Assume that if tags are present, those tags are name and type.
// Name is always first, followed by type.
List<String> parameters = engine.getTags(binding);
String name = parameters.get(0);
String type = parameters.get(1);
inputs.add(new RMDTriplet(name,type,binding));
}
else {
// Otherwise, use old-format bindings.
String[] split = binding.split(",");
if (split.length != 3) throw new IllegalArgumentException(binding + " is not a valid reference metadata track description");
inputs.add(new RMDTriplet(split[0], split[1], split[2]));
}
}
}
/**
* create the requested track objects
*
* @return a list of the tracks, one for each of the requested input tracks
*/
private List<RMDTrack> createRequestedTrackObjects() {
// create of live instances of the tracks
List<RMDTrack> tracks = new ArrayList<RMDTrack>();
// create instances of each of the requested types
for (RMDTriplet trip : inputs) {
Class featureCodecClass = getAvailableTrackNamesAndTypes().get(trip.getType().toUpperCase());
if (featureCodecClass == null)
throw new UserException.BadArgumentValue("-B",trip.getType());
tracks.add(createInstanceOfTrack(featureCodecClass, trip.getName(), new File(trip.getFile())));
}
return tracks;
}
}

View File

@ -1,107 +0,0 @@
/*
* Copyright (c) 2010. The Broad Institute
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.refdata.tracks.builders;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.refdata.*;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackCreationException;
import org.broadinstitute.sting.gatk.refdata.tracks.RODRMDTrack;
import java.io.File;
import java.util.HashMap;
import java.util.Map;
/**
* @author aaron
* <p/>
* Class RODTrackBuilder
* <p/>
* the builder for tracks of the current ROD system, a holdover until Tribble supports binary and multi-line formats
*/
public class RODTrackBuilder implements RMDTrackBuilder {
/** our log, which we want to capture anything from this class */
private static Logger logger = Logger.getLogger(ReferenceOrderedData.class);
/**
* the bindings from track name to the ROD class we use
*/
private static HashMap<String, Class<? extends ReferenceOrderedDatum>> Types = new HashMap<String, Class<? extends ReferenceOrderedDatum>>();
static {
// All known ROD types
Types.put("GELI", rodGELI.class);
Types.put("Table", TabularROD.class);
Types.put("Intervals", IntervalRod.class);
Types.put("Plink", PlinkRod.class);
}
/**
* create a RMDTrack of the specified type
*
* @param targetClass the target class of track
* @param name what to call the track
* @param inputFile the input file
*
* @return an instance of the track
* @throws org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackCreationException
* if we don't know of the target class or we couldn't create it
*/
//@Override
public RMDTrack createInstanceOfTrack(Class targetClass, String name, File inputFile) throws RMDTrackCreationException {
return new RODRMDTrack(targetClass, name, inputFile, createROD(name,targetClass,inputFile));
}
/** @return a map of all available track types we currently have access to create */
@Override
public Map<String, Class> getAvailableTrackNamesAndTypes() {
return new HashMap<String, Class>(Types);
}
/** @return a map of all available track record types we currently have access to create */
@Override
public Map<String, Class> getAvailableTrackNamesAndRecordTypes() {
return new HashMap<String, Class>(Types);
}
/**
* Helpful function that parses a single triplet of <name> <type> <file> and returns the corresponding ROD with
* <name>, of type <type> that reads its input from <file>.
*
* @param trackName the name of the track to create
* @param type the type of the track to create
* @param fileName the filename to create the track from
* @return a reference ordered data track
*/
public ReferenceOrderedData createROD(final String trackName, Class type, File fileName) {
// Create the ROD
ReferenceOrderedData<?> rod = new ReferenceOrderedData<ReferenceOrderedDatum>(trackName, fileName, type );
logger.info(String.format("Created binding from %s to %s of type %s", trackName, fileName, type));
return rod;
}
}

View File

@ -1,334 +0,0 @@
/*
* Copyright (c) 2010 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.refdata.tracks.builders;
import net.sf.samtools.SAMSequenceDictionary;
import net.sf.samtools.SAMSequenceRecord;
import org.apache.log4j.Logger;
import org.broad.tribble.*;
import org.broad.tribble.index.Index;
import org.broad.tribble.index.IndexFactory;
import org.broad.tribble.source.BasicFeatureSource;
import org.broad.tribble.util.LittleEndianOutputStream;
import org.broadinstitute.sting.gatk.refdata.tracks.TribbleTrack;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackCreationException;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.classloader.PluginManager;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.file.FSLockWithShared;
import org.broadinstitute.sting.utils.file.FileSystemInabilityToLockException;
import java.io.*;
import java.util.*;
/**
*
* @author aaron
*
* Class TribbleRMDTrackBuilder
*
* This class keeps track of the available codecs, and knows how to put together a track of
* that gets iterators from the FeatureReader using Tribble.
*
*/
public class TribbleRMDTrackBuilder extends PluginManager<FeatureCodec> implements RMDTrackBuilder {
/**
* our log, which we want to capture anything from this class
*/
private static Logger logger = Logger.getLogger(TribbleRMDTrackBuilder.class);
// the linear index extension
public static final String indexExtension = ".idx";
/** Create a new plugin manager. */
public TribbleRMDTrackBuilder() {
super(FeatureCodec.class, "Codecs", "Codec");
}
/** @return a list of all available track types we currently have access to create */
public Map<String, Class> getAvailableTrackNamesAndTypes() {
return new HashMap<String, Class>(this.pluginsByName);
}
/** @return a list of all available track record types we currently have access to create */
public Map<String, Class> getAvailableTrackNamesAndRecordTypes() {
Map<String, Class> classes = new HashMap<String, Class>();
for (String name: this.pluginsByName.keySet()) {
FeatureCodec codec = this.createByName(name);
classes.put(name, codec.getFeatureType());
}
return classes;
}
/**
* create a RMDTrack of the specified type
*
* @param targetClass the target class of track
* @param name what to call the track
* @param inputFile the input file
*
* @return an instance of the track
* @throws RMDTrackCreationException
* if we don't know of the target class or we couldn't create it
*/
public RMDTrack createInstanceOfTrack(Class targetClass, String name, File inputFile) throws RMDTrackCreationException {
// return a feature reader track
Pair<BasicFeatureSource, SAMSequenceDictionary> pair = createFeatureReader(targetClass, name, inputFile);
if (pair == null) throw new UserException.CouldNotReadInputFile(inputFile, "Unable to make the feature reader for input file");
return new TribbleTrack(targetClass, name, inputFile, pair.first, pair.second, createCodec(targetClass, name));
}
/**
* create a tribble feature reader class, given the target class and the input file
* @param targetClass the target class, of a Tribble Codec type
* @param inputFile the input file, that corresponds to the feature type
* @return a pair of <BasicFeatureSource, SAMSequenceDictionary>
*/
public Pair<BasicFeatureSource, SAMSequenceDictionary> createFeatureReader(Class targetClass, File inputFile) {
return createFeatureReader(targetClass, "anonymous", inputFile);
}
/**
* create a feature reader of the specified type
* @param targetClass the target codec type
* @param name the target name
* @param inputFile the input file to create the track from (of the codec type)
* @return the FeatureReader instance
*/
public Pair<BasicFeatureSource, SAMSequenceDictionary> createFeatureReader(Class targetClass, String name, File inputFile) {
Pair<BasicFeatureSource, SAMSequenceDictionary> pair;
if (inputFile.getAbsolutePath().endsWith(".gz"))
pair = createBasicFeatureSourceNoAssumedIndex(targetClass, name, inputFile);
else
pair = getLinearFeatureReader(targetClass, name, inputFile);
return pair;
}
/**
* create a feature reader, without assuming there exists an index. This code assumes the feature
* reader of the appropriate type will figure out what the right index type is, and determine if it
* exists.
*
* @param targetClass the codec class type
* @param name the name of the track
* @param inputFile the file to load
* @return a feature reader implementation
*/
private Pair<BasicFeatureSource, SAMSequenceDictionary> createBasicFeatureSourceNoAssumedIndex(Class targetClass, String name, File inputFile) {
// we might not know the index type, try loading with the default reader constructor
logger.info("Attempting to blindly load " + inputFile + " as a tabix indexed file");
try {
return new Pair<BasicFeatureSource, SAMSequenceDictionary>(BasicFeatureSource.getFeatureSource(inputFile.getAbsolutePath(), createCodec(targetClass, name)),null);
} catch (IOException e) {
throw new UserException.CouldNotReadInputFile(inputFile, "Unable to create feature reader from file", e);
}
}
/**
* add a name to the codec, if it takes one
* @param targetClass the class to create a codec for
* @param name the name to assign this codec
* @return the feature codec itself
*/
private FeatureCodec createCodec(Class targetClass, String name) {
FeatureCodec codex = this.createByType(targetClass);
if ( codex instanceof NameAwareCodec )
((NameAwareCodec)codex).setName(name);
return codex;
}
/**
* create a linear feature reader, where we create the index ahead of time
* @param targetClass the target class
* @param name the name of the codec
* @param inputFile the tribble file to parse
* @return the input file as a FeatureReader
*/
private Pair<BasicFeatureSource, SAMSequenceDictionary> getLinearFeatureReader(Class targetClass, String name, File inputFile) {
Pair<BasicFeatureSource, SAMSequenceDictionary> reader;
try {
Index index = loadIndex(inputFile, createCodec(targetClass, name), true);
reader = new Pair<BasicFeatureSource, SAMSequenceDictionary>(new BasicFeatureSource(inputFile.getAbsolutePath(),
index,
createCodec(targetClass, name)),
sequenceSetToDictionary(index.getSequenceNames()));
} catch (FileNotFoundException e) {
throw new UserException.CouldNotReadInputFile(inputFile, "Unable to create reader with file", e);
} catch (IOException e) {
throw new UserException("Unable to make the index file for " + inputFile, e);
}
return reader;
}
/**
* create an index for the input file
* @param inputFile the input file
* @param codec the codec to use
* @param onDisk write the index to disk?
* @return a linear index for the specified type
* @throws IOException if we cannot write the index file
*/
public synchronized static Index loadIndex(File inputFile, FeatureCodec codec, boolean onDisk) throws IOException {
// create the index file name, locking on the index file name
File indexFile = new File(inputFile.getAbsoluteFile() + indexExtension);
FSLockWithShared lock = new FSLockWithShared(indexFile);
// acquire a lock on the file
Index idx = null;
if (indexFile.canRead())
idx = attemptIndexFromDisk(inputFile, codec, indexFile, lock);
// if we managed to make an index, return
if (idx != null) return idx;
// we couldn't read the file, or we fell out of the conditions above, continue on to making a new index
return createNewIndex(inputFile, codec, onDisk, indexFile, lock);
}
/**
* attempt to read the index from disk
* @param inputFile the input file
* @param codec the codec to read from
* @param indexFile the index file itself
* @param lock the lock file
* @return an index, or null if we couldn't load one
* @throws IOException if we fail for FS issues
*/
protected static Index attemptIndexFromDisk(File inputFile, FeatureCodec codec, File indexFile, FSLockWithShared lock) throws IOException {
boolean locked;
try {
locked = lock.sharedLock();
}
catch(FileSystemInabilityToLockException ex) {
throw new ReviewedStingException("Unexpected inability to lock exception", ex);
}
Index idx;
try {
if (!locked) // can't lock file
idx = createIndexInMemory(inputFile, codec);
else
idx = loadFromDisk(inputFile, indexFile);
} finally {
if (locked) lock.unlock();
}
return idx;
}
/**
* load the index from disk, checking for out of date indexes and old versions (both of which are deleted)
* @param inputFile the input file
* @param indexFile the input file, plus the index extension
* @return an Index, or null if we're unable to load
*/
public static Index loadFromDisk(File inputFile, File indexFile) {
logger.info("Loading Tribble index from disk for file " + inputFile);
Index index = IndexFactory.loadIndex(indexFile.getAbsolutePath());
// check if the file is up-to date (filestamp and version check)
if (index.isCurrentVersion() && indexFile.lastModified() > inputFile.lastModified())
return index;
else if (indexFile.lastModified() < inputFile.lastModified())
logger.warn("Index file " + indexFile + " is out of date (index older than input file), deleting and updating the index file");
else // we've loaded an old version of the index, we want to remove it <-- currently not used, but may re-enable
logger.warn("Index file " + indexFile + " is out of date (old version), deleting and updating the index file");
// however we got here, remove the index and return null
boolean deleted = indexFile.delete();
if (!deleted) logger.warn("Index file " + indexFile + " is out of date, but could not be removed; it will not be trusted (we'll try to rebuild an in-memory copy)");
return null;
}
/**
* attempt to create the index, and write it to disk
* @param inputFile the input file
* @param codec the codec to use
* @param onDisk if they asked for disk storage or now
* @param indexFile the index file location
* @param lock the locking object
* @return the index object
* @throws IOException when unable to create the new index
*/
private static Index createNewIndex(File inputFile, FeatureCodec codec, boolean onDisk, File indexFile, FSLockWithShared lock) throws IOException {
Index index = createIndexInMemory(inputFile, codec);
boolean locked = false; // could we exclusive lock the file?
try {
locked = lock.exclusiveLock();
if (locked) {
logger.info("Writing Tribble index to disk for file " + inputFile);
LittleEndianOutputStream stream = new LittleEndianOutputStream(new FileOutputStream(indexFile));
index.write(stream);
stream.close();
}
else // we can't write it to disk, just store it in memory, tell them this
if (onDisk) logger.info("Unable to write to " + indexFile + " for the index file, creating index in memory only");
return index;
}
catch(FileSystemInabilityToLockException ex) {
throw new ReviewedStingException("Unexpected inability to lock exception", ex);
}
finally {
if (locked) lock.unlock();
}
}
/**
* create the index in memory, given the input file and feature codec
* @param inputFile the input file
* @param codec the codec
* @return a LinearIndex, given the file location
* @throws IOException when unable to create the index in memory
*/
private static Index createIndexInMemory(File inputFile, FeatureCodec codec) throws IOException {
// this can take a while, let them know what we're doing
logger.info("Creating Tribble index in memory for file " + inputFile);
return IndexFactory.createIndex(inputFile, codec, IndexFactory.IndexBalanceApproach.FOR_SEEK_TIME);
}
/**
* convert a list of Strings into a sequence dictionary
* @param contigList the contig list, in coordinate order, this is allowed to be null
* @return a SAMSequenceDictionary, WITHOUT contig sizes
*/
private static SAMSequenceDictionary sequenceSetToDictionary(LinkedHashSet<String> contigList) {
SAMSequenceDictionary dict = new SAMSequenceDictionary();
if (contigList == null) return dict;
for (String name : contigList) {
SAMSequenceRecord seq = new SAMSequenceRecord(name, 0);
dict.addSequence(seq);
}
return dict;
}
}

View File

@ -1,42 +0,0 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broad.tribble.util.variantcontext.VariantContext;
import org.broad.tribble.vcf.VCFHeaderLineType;
import org.broad.tribble.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.TabularROD;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import java.util.HashMap;
import java.util.Map;
import java.util.List;
import java.util.Arrays;
public class Alignability implements InfoFieldAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker,
ReferenceContext ref,
Map<String, StratifiedAlignmentContext> stratifiedContexts,
VariantContext vc)
{
TabularROD record = tracker.lookup("alignability",TabularROD.class);
if (record == null)
return null;
if (record.get("alignability") == null)
throw new RuntimeException("ERROR: alignability column not defined in alignability input.\n");
int value = Integer.parseInt(record.get("alignability"));
Map<String, Object> map = new HashMap<String, Object>();
map.put(getKeyNames().get(0), String.format("%d", value));
return map;
}
public List<String> getKeyNames() { return Arrays.asList("Alignability"); }
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine(getKeyNames().get(0), 1, VCFHeaderLineType.Integer, "Alignability according to a mask file (3 is best, 0 is worst)")); }
}

View File

@ -33,7 +33,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator;
import org.broadinstitute.sting.gatk.refdata.features.refseq.RefSeqCodec;
import org.broadinstitute.sting.gatk.refdata.features.refseq.RefSeqFeature;
import org.broadinstitute.sting.gatk.refdata.tracks.builders.TribbleRMDTrackBuilder;
import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder;
import org.broadinstitute.sting.gatk.refdata.utils.*;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.utils.*;
@ -399,7 +399,7 @@ public class DepthOfCoverageWalker extends LocusWalker<Map<DoCOutputType.Partiti
}
private LocationAwareSeekableRODIterator initializeRefSeq() {
TribbleRMDTrackBuilder builder = new TribbleRMDTrackBuilder();
RMDTrackBuilder builder = new RMDTrackBuilder();
FeatureSource refseq = builder.createFeatureReader(RefSeqCodec.class,refSeqGeneList).first;
try {
return new SeekableRODIterator(new FeatureToGATKFeatureIterator(refseq.iterator(),"refseq"));

View File

@ -35,7 +35,7 @@ import org.broadinstitute.sting.gatk.filters.*;
import org.broadinstitute.sting.gatk.refdata.*;
import org.broadinstitute.sting.gatk.refdata.features.refseq.RefSeqCodec;
import org.broadinstitute.sting.gatk.refdata.features.refseq.RefSeqFeature;
import org.broadinstitute.sting.gatk.refdata.tracks.builders.TribbleRMDTrackBuilder;
import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder;
import org.broadinstitute.sting.gatk.refdata.utils.FeatureToGATKFeatureIterator;
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
@ -216,7 +216,7 @@ public class IndelGenotyperV2Walker extends ReadWalker<Integer,Integer> {
if ( RefseqFileName != null ) {
logger.info("Using RefSeq annotations from "+RefseqFileName);
TribbleRMDTrackBuilder builder = new TribbleRMDTrackBuilder();
RMDTrackBuilder builder = new RMDTrackBuilder();
FeatureSource refseq = builder.createFeatureReader(RefSeqCodec.class,new File(RefseqFileName)).first;
try {

View File

@ -31,11 +31,9 @@ import org.broad.tribble.util.variantcontext.Allele;
import org.broad.tribble.util.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
import org.broadinstitute.sting.gatk.refdata.*;
import org.broadinstitute.sting.gatk.refdata.tracks.builders.TribbleRMDTrackBuilder;
import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder;
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeatureIterator;
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper;
@ -88,14 +86,13 @@ public class PickSequenomProbes extends RodWalker<String, String> {
logger.info("Loading SNP mask... ");
ReferenceOrderedData snp_mask;
if ( SNP_MASK.contains(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME)) {
TribbleRMDTrackBuilder builder = new TribbleRMDTrackBuilder();
RMDTrackBuilder builder = new RMDTrackBuilder();
CloseableIterator<GATKFeature> iter = builder.createInstanceOfTrack(DbSNPCodec.class,"snp_mask",new java.io.File(SNP_MASK)).getIterator();
snpMaskIterator = new SeekableRODIterator(iter);
} else {
snp_mask = new ReferenceOrderedData<TabularROD>("snp_mask",
new java.io.File(SNP_MASK), TabularROD.class);
snpMaskIterator = new SeekableRODIterator(new GATKFeatureIterator(snp_mask.iterator()));
// TODO: fix me when Plink is back
throw new IllegalArgumentException("We currently do not support other snp_mask tracks (like Plink)");
}
}

View File

@ -32,7 +32,6 @@ import org.broad.tribble.Feature;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
import org.broadinstitute.sting.gatk.refdata.PlinkRod;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors;
import org.broadinstitute.sting.gatk.walkers.*;
@ -208,10 +207,6 @@ public class SequenomValidationConverter extends RodWalker<Pair<VariantContext,
infoMap.put(VCFConstants.ALLELE_COUNT_KEY, String.format("%d", altAlleleCount));
infoMap.put(VCFConstants.ALLELE_NUMBER_KEY, String.format("%d", vContext.getChromosomeCount()));
// set the id if it's a plink rod
if ( rod instanceof PlinkRod )
infoMap.put(VariantContext.ID_KEY, ((PlinkRod)rod).getVariantName());
vContext = VariantContextUtils.modifyAttributes(vContext, infoMap);
return new Pair<VariantContext, Byte>(vContext, ref.getBase());

View File

@ -9,7 +9,7 @@ import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.tracks.builders.TribbleRMDTrackBuilder;
import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder;
import org.broadinstitute.sting.gatk.walkers.By;
import org.broadinstitute.sting.gatk.walkers.DataSource;
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
@ -47,7 +47,7 @@ public class DbSNPWindowCounter extends LocusWalker<Integer, Long> {
public void initialize() {
TribbleRMDTrackBuilder builder = new TribbleRMDTrackBuilder();
RMDTrackBuilder builder = new RMDTrackBuilder();
reader = builder.createFeatureReader(DbSNPCodec.class,myDbSNPFile).first;
}

View File

@ -47,7 +47,7 @@ public class DesignFileGeneratorWalker extends RodWalker<Long,Long> {
if ( intervalsList != null && intervalsList.size() > 0 ) {
for ( Object interval : intervalsList ) {
GenomeLoc loc = ((IntervalRod) interval).getLocation();
GenomeLoc loc = ((GATKFeature)interval).getLocation();
if ( ! intervalBuffer.keySet().contains(loc) ) {
intervalBuffer.put(loc,new IntervalInfoBuilder());
}

View File

@ -2,8 +2,6 @@ package org.broadinstitute.sting.oneoffprojects.walkers;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.IntervalRod;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
import org.broadinstitute.sting.gatk.walkers.RefWalker;

View File

@ -11,7 +11,7 @@ import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils
import org.broadinstitute.sting.gatk.refdata.*;
import org.broadinstitute.sting.gatk.refdata.features.refseq.RefSeqCodec;
import org.broadinstitute.sting.gatk.refdata.features.refseq.RefSeqFeature;
import org.broadinstitute.sting.gatk.refdata.tracks.builders.TribbleRMDTrackBuilder;
import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder;
import org.broadinstitute.sting.gatk.refdata.utils.FeatureToGATKFeatureIterator;
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
@ -36,7 +36,7 @@ public class IndelAnnotator extends RodWalker<Integer,Long> {
public void initialize() {
if ( RefseqFileName != null ) {
TribbleRMDTrackBuilder builder = new TribbleRMDTrackBuilder();
RMDTrackBuilder builder = new RMDTrackBuilder();
FeatureSource refseq = builder.createFeatureReader(RefSeqCodec.class,new File(RefseqFileName)).first;
try {

View File

@ -5,7 +5,6 @@ import org.broad.tribble.util.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvaluator;
import org.broadinstitute.sting.playground.utils.report.tags.DataPoint;
import org.broadinstitute.sting.playground.utils.report.utils.TableType;

View File

@ -1,192 +0,0 @@
/*
* Copyright (c) 2010 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.playground.gatk.walkers.hybridselection;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.IntervalRod;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.By;
import org.broadinstitute.sting.gatk.walkers.DataSource;
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Output;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.io.PrintStream;
/**
* Accumulates coverage across hybrid selection bait intervals to assess effect of bait adjacency and overlap on coverage
*/
@By(DataSource.REFERENCE)
public class CoverageAcrossBaitsWalker extends LocusWalker<Pair<Integer, Integer>, CoverageAcrossBaitsWalker.Coverage> {
@Output
public PrintStream out;
/* Accumulates coverage across hybrid selection bait intervals to assess effect of bait adjacency.
Requires input bait intervals that have an overhang beyond the actual bait interval to capture coverage data at these points.
Outputs R parseable file that has all data in lists and then does some basic plotting.
*/
@Argument(fullName="include_duplicates", shortName="idup", required=false, doc="consider duplicate reads")
public boolean INCLUDE_DUPLICATE_READS = false;
@Argument(fullName="min_mapq", shortName="mmq", required=false, doc="Minimum mapping quality of reads to consider")
public Integer MIN_MAPQ = 1;
@Argument(fullName="min_len", shortName="min", required=false, doc="Minimum length of interval to consider")
public Integer MIN_LEN = Integer.MIN_VALUE;
@Argument(fullName="max_len", shortName="max", required=false, doc="Maximum length of interval to consider")
public Integer MAX_LEN = 5000;
@Argument(fullName="max_bait_count", shortName="mbc", required=false, doc="Maximum number of baits to consider")
public Integer MAX_BAIT_COUNT = 24;
@Argument(fullName="free_standing_distance", shortName="fsd", required=false, doc="minimum distance to next interval to consider freestanding")
public Integer FREE_STANDING_DISTANCE = 500;
public static class Coverage {
// Container for carrying positive and negative strand coverage
public ArrayList<Integer> pos = new ArrayList<Integer>();
public ArrayList<Integer> neg = new ArrayList<Integer>();
public void addCoveragePoints(Pair<Integer,Integer> pn) {
pos.add(pn.first);
neg.add(pn.second);
}
}
public Pair<Integer, Integer> map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
List<SAMRecord> reads = context.getReads();
IntervalRod intervalROD = tracker.lookup("interval",IntervalRod.class);
GenomeLoc interval = intervalROD == null ? null : intervalROD.getLocation();
if (interval == null) { throw new ReviewedStingException("No intervals at locus; should not happen"); }
int offset = (int)(context.getPosition() - interval.getStart());
int depth[] = new int[2];
for ( int i = 0; i < reads.size(); i++ )
{
SAMRecord read = reads.get(i);
if (read.getNotPrimaryAlignmentFlag()) { continue; }
if (read.getReadUnmappedFlag()) { continue; }
if (!INCLUDE_DUPLICATE_READS && read.getDuplicateReadFlag()) { continue; }
if (read.getMappingQuality() < MIN_MAPQ) { continue; }
depth[read.getReadNegativeStrandFlag() ? 0 : 1]++;
}
return new Pair<Integer,Integer>(depth[0],depth[1]);
}
public Coverage reduceInit() { return new Coverage(); }
public Coverage reduce(Pair<Integer, Integer> depths, Coverage cov) {
cov.addCoveragePoints(depths);
return cov;
}
/**
* Return true if your walker wants to reduce each interval separately. Default is false.
* If you set this flag, several things will happen.
* The system will invoke reduceInit() once for each interval being processed, starting a fresh reduce
* Reduce will accumulate normally at each map unit in the interval
* However, onTraversalDone(reduce) will be called after each interval is processed.
* The system will call onTraversalDone( GenomeLoc -> reduce ), after all reductions are done,
* which is overloaded here to call onTraversalDone(reduce) for each location
*/
public boolean isReduceByInterval() {
return true;
}
public void onTraversalDone(List<Pair<GenomeLoc, Coverage>> results) {
int TOTAL_BAIT_WINDOW = MAX_BAIT_COUNT*120+FREE_STANDING_DISTANCE*2;
int depths[][][] = new int[2][MAX_BAIT_COUNT+1][TOTAL_BAIT_WINDOW]; // Indices: pos/neg strand count, bait count, position in bait
int bait_count[] = new int[MAX_BAIT_COUNT+1];
for (Pair<GenomeLoc, Coverage> pair : results) {
GenomeLoc loc = pair.first;
//if (loc.size() < MIN_LEN || loc.size() > MAX_LEN) { continue; }
long interval_width = loc.size() - FREE_STANDING_DISTANCE * 2;
int baits = (int)(interval_width / 120);
//out.format("#Interval_width: %d Bait_count: %d\n", interval_width, baits);
if (interval_width % 120 != 0) { continue; }
if (interval_width > MAX_BAIT_COUNT*120) { continue; }
// This target is good; count it
bait_count[baits]++;
Coverage pn_cov = pair.second;
for (int pn=0; pn<2; pn++) {
ArrayList<Integer> cov = pn == 0 ? pn_cov.neg : pn_cov.pos;
for (int i=0; i<cov.size(); i++) {
depths[pn][baits][i] += cov.get(i);
}
}
}
// Write out the R script that plots all of the coverage across intervals
// All of the data is contained in variables b? where ? corresponds
// to
Boolean plot_begun = false;
out.format("pcounts <- NULL\n");
out.format("ncounts <- NULL\n");
for (int baits=MAX_BAIT_COUNT; baits>0; baits--) {
int norm_depth_width = baits*120+FREE_STANDING_DISTANCE*2;
float norm_depths[] = new float[norm_depth_width];
for (int pn=0; pn<2; pn++) {
for (int i=0; i<norm_depth_width; i++) {
norm_depths[i] = (float)depths[pn][baits][i]/bait_count[baits];
}
//out.format("Baits/target: %s\n", baits);
//out.format("Depths: %s\n", Arrays.toString(norm_depths));
//out.format("Number of baits: %d\n", bait_count[baits]);
char pn_char = pn == 0 ? 'n' : 'p';
String pn_color = pn == 0 ? "black" : "red";
if (bait_count[baits] > 0) {
String depth_str = Arrays.toString(norm_depths);
depth_str = depth_str.substring(1,depth_str.length()-1);
out.format("b%c%d <- c(%s)\n", pn_char, baits, depth_str);
if (plot_begun) {
out.format("points(b%c%d, type='l', col=\"%s\")\n", pn_char, baits, pn_color);
}else{
out.format("plot(b%c%d, type='l', col=\"%s\", xlab='Position from start of 1st bait', ylab='Coverage')\n", pn_char, baits, pn_color);
plot_begun = true;
}
}
out.format("%ccounts <- c(%ccounts,%d)\n", pn_char, pn_char, bait_count[baits]);
}
}
}
}

View File

@ -1,320 +0,0 @@
/*
* Copyright (c) 2010 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.playground.gatk.walkers.hybridselection;
import net.sf.picard.reference.ReferenceSequence;
import net.sf.picard.reference.IndexedFastaSequenceFile;
import net.sf.picard.util.Interval;
import net.sf.picard.util.IntervalList;
import net.sf.picard.util.OverlapDetector;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.util.StringUtil;
import org.broad.tribble.FeatureSource;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator;
import org.broadinstitute.sting.gatk.refdata.features.refseq.RefSeqCodec;
import org.broadinstitute.sting.gatk.refdata.features.refseq.RefSeqFeature;
import org.broadinstitute.sting.gatk.refdata.tracks.builders.TribbleRMDTrackBuilder;
import org.broadinstitute.sting.gatk.refdata.utils.*;
import org.broadinstitute.sting.gatk.walkers.By;
import org.broadinstitute.sting.gatk.walkers.DataSource;
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.utils.exceptions.UserException;
import java.io.File;
import java.io.IOException;
import java.io.PrintStream;
import java.util.Collection;
import java.util.List;
/**
* Given intervals corresponding to targets or baits in a hybrid selection experiment, this walker gives the following interval-by-interval data:
* coverage, %GC, corresponding gene name, bait quantity, number of adjacent baits, and whether the bait is boosted.
* It can be useful to manually disable the merging of intervals in the GATK when using this walker; this feature is not yet part of the GATK.
*/
@By(DataSource.REFERENCE)
public class HybSelPerformanceWalker extends LocusWalker<Integer, HybSelPerformanceWalker.TargetInfo> implements TreeReducible<HybSelPerformanceWalker.TargetInfo> {
@Output
public PrintStream out;
@Argument(fullName="min_mapq", shortName="mmq", required=false, doc="Minimum mapping quality of reads to consider")
public Integer MIN_MAPQ = 1;
@Argument(fullName="min_baseq", shortName="mbq", required=false, doc="Minimum base call quality of reads to consider (default: 0)")
public Integer MIN_BASEQ = 0;
@Argument(fullName="include_duplicates", shortName="idup", required=false, doc="consider duplicate reads")
public boolean INCLUDE_DUPLICATE_READS = false;
@Argument(fullName="free_standing_distance", shortName="fsd", required=false, doc="minimum distance to next interval to consider freestanding")
public Integer FREE_STANDING_DISTANCE = 500;
@Argument(fullName="booster", required=false, doc="interval list of booster baits")
public File BOOSTER_FILE;
@Argument(fullName="booster_distance", required=false, doc="distance up to which a booster can affect a target")
public Integer BOOSTER_DISTANCE = 100; // how far away can a booster be to "hit" its target?
@Argument(fullName="bait_quantity", required=false, doc="interval list of baits with quantity/concentration (obtainied via sequencing, Nanostring)")
public File BAIT_QUANT_FILE;
@Argument(fullName="refseq", shortName="refseq",
doc="Name of RefSeq transcript annotation file. If specified, intervals will be specified with gene names", required=false)
String REFSEQ_FILE = null;
private LocationAwareSeekableRODIterator refseqIterator=null;
public static class TargetInfo {
public int counts = 0;
// did at least two reads hit this target
public boolean hitTwice = false;
public int positionsOver2x = 0;
public int positionsOver8x = 0;
public int positionsOver10x = 0;
public int positionsOver20x = 0;
public int positionsOver30x = 0;
}
// @Argument(fullName="suppressLocusPrinting",required=false,defaultValue="false")
// public boolean suppressPrinting;
@Override
public void initialize() {
if ( REFSEQ_FILE != null ) {
TribbleRMDTrackBuilder builder = new TribbleRMDTrackBuilder();
FeatureSource refseq = builder.createFeatureReader(RefSeqCodec.class, new File(REFSEQ_FILE)).first;
try {
refseqIterator = new SeekableRODIterator(new FeatureToGATKFeatureIterator(refseq.iterator(), "refseq"));
} catch (IOException e) {
throw new UserException.CouldNotReadInputFile(REFSEQ_FILE, e);
}
logger.info("Using RefSeq annotations from "+REFSEQ_FILE);
}
if ( refseqIterator == null ) logger.info("No annotations available");
}
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
List<SAMRecord> reads = context.getReads();
List<Integer> offsets = context.getOffsets();
int depth = 0;
for ( int i = 0; i < reads.size(); i++ )
{
SAMRecord read = reads.get(i);
if (read.getNotPrimaryAlignmentFlag()) { continue; }
if (read.getReadUnmappedFlag()) { continue; }
if (!INCLUDE_DUPLICATE_READS && read.getDuplicateReadFlag()) { continue; }
if (read.getMappingQuality() < MIN_MAPQ) { continue; }
if (read.getBaseQualities()[offsets.get(i)] < MIN_BASEQ) { continue; }
depth++;
}
return depth;
}
/**
* Return true if your walker wants to reduce each interval separately. Default is false.
*
* If you set this flag, several things will happen.
*
* The system will invoke reduceInit() once for each interval being processed, starting a fresh reduce
* Reduce will accumulate normally at each map unit in the interval
* However, onTraversalDone(reduce) will be called after each interval is processed.
* The system will call onTraversalDone( GenomeLoc -> reduce ), after all reductions are done,
* which is overloaded here to call onTraversalDone(reduce) for each location
*/
public boolean isReduceByInterval() {
return true;
}
public TargetInfo reduceInit() { return new TargetInfo(); }
public TargetInfo reduce(Integer depth, TargetInfo sum) {
sum.counts += depth;
if (depth >= 2) { sum.hitTwice = true; sum.positionsOver2x++;}
if (depth >= 8) { sum.positionsOver8x++; }
if (depth >= 10) { sum.positionsOver10x++; }
if (depth >= 20) { sum.positionsOver20x++; }
if (depth >= 30) { sum.positionsOver30x++; }
return sum;
}
public TargetInfo treeReduce(TargetInfo lhs, TargetInfo rhs) {
return lhs; // dummy function to test if tree reduce by interval is working
}
public void onTraversalDone(TargetInfo result) {
}
@Override
public void onTraversalDone(List<Pair<GenomeLoc, TargetInfo>> results) {
out.println("location\tlength\tgc\tavg_coverage\tnormalized_coverage\thit_twice\tfreestanding\tboosted\tbases_over_2x\tbases_over_8x\tbases_over_10x\tbases_over_20x\tbases_over_30x\tgene_name\tbait_quantity\tadjacent_baits");
// first zip through and build an overlap detector of all the intervals, so later
// we can calculate if this interval is free-standing
OverlapDetector<Interval> od = new OverlapDetector<Interval>(-FREE_STANDING_DISTANCE,0);
for(Pair<GenomeLoc, TargetInfo> pair : results) {
GenomeLoc target = pair.getFirst();
Interval interval = makeInterval(target);
od.addLhs(interval, interval);
}
OverlapDetector<Interval> booster = new OverlapDetector<Interval>(-BOOSTER_DISTANCE,0);
if (BOOSTER_FILE != null) {
IntervalList il = IntervalList.fromFile(BOOSTER_FILE);
List<Interval> l = il.getUniqueIntervals();
booster.addAll(l, l);
}
// Create a interval detector that will give us the bait quantity specified in
// an optional bait quantity file
OverlapDetector<Interval> bait_quant = new OverlapDetector<Interval>(0,0);
if (BAIT_QUANT_FILE != null) {
IntervalList il = IntervalList.fromFile(BAIT_QUANT_FILE);
List<Interval> l = il.getIntervals();
bait_quant.addAll(l, l);
}
// Create a detector of adjacent baits
OverlapDetector<Interval> adjacent_bait_detector = new OverlapDetector<Interval>(-1,-1);
for(Pair<GenomeLoc, TargetInfo> pair : results) {
GenomeLoc target = pair.getFirst();
Interval interval = makeInterval(target);
adjacent_bait_detector.addLhs(interval, interval);
}
// now zip through and calculate the total average coverage
long totalCoverage = 0;
long basesConsidered = 0;
for(Pair<GenomeLoc, TargetInfo> pair : results) {
GenomeLoc target = pair.getFirst();
TargetInfo ti = pair.getSecond();
// as long as it was hit twice, count it
if(ti.hitTwice) {
long length = target.getStop() - target.getStart() + 1;
totalCoverage += ti.counts;
basesConsidered += length;
}
}
double meanTargetCoverage = (1.0*totalCoverage) / basesConsidered;
for(Pair<GenomeLoc, TargetInfo> pair : results) {
GenomeLoc target = pair.getFirst();
TargetInfo ti = pair.getSecond();
long length = target.getStop() - target.getStart() + 1;
double avgCoverage = ((double)ti.counts / (double)length);
double normCoverage = avgCoverage / meanTargetCoverage;
// calculate gc for the target
double gc = calculateGC(target);
// if there is more than one hit on the overlap detector, it's not freestanding
Interval targetInterval = makeInterval(target);
Collection<Interval> hits = od.getOverlaps(targetInterval);
boolean freestanding = (hits.size() == 1);
boolean boosted = (booster.getOverlaps(targetInterval).size() > 0);
// look up the gene name info
String geneName = getGeneName(target);
Collection<Interval> bait_quant_hits = bait_quant.getOverlaps(targetInterval);
String bait_quant_string = (bait_quant_hits.size() == 1) ? bait_quant_hits.iterator().next().getName() : "0";
if (bait_quant_hits.size() > 1) { out.printf("Warning: multiple bait quantity intervals detected; perhaps bait quantity interval lengths don't match primary interval list specified with -L\n"); }
int adjacent_baits = adjacent_bait_detector.getOverlaps(targetInterval).size() - 1;
out.printf("%s:%d-%d\t%d\t%6.4f\t%6.4f\t%6.4f\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%s\t%s\t%d\n",
target.getContig(), target.getStart(), target.getStop(), length, gc,
avgCoverage, normCoverage, ((ti.hitTwice)?1:0), ((freestanding)?1:0), ((boosted)?1:0),
ti.positionsOver2x,
ti.positionsOver8x,
ti.positionsOver10x,
ti.positionsOver20x,
ti.positionsOver30x,
geneName,
bait_quant_string,
adjacent_baits
);
}
}
private Interval makeInterval(GenomeLoc target) {
return new Interval(target.getContig(), (int) target.getStart(), (int) target.getStop());
}
private String getGeneName(GenomeLoc target) {
if (refseqIterator == null) { return "UNKNOWN"; }
RODRecordList annotationList = refseqIterator.seekForward(target);
if (annotationList == null) { return "UNKNOWN"; }
for(GATKFeature rec : annotationList) {
if ( ((RefSeqFeature)rec.getUnderlyingObject()).overlapsExonP(target) ) {
return ((RefSeqFeature)rec.getUnderlyingObject()).getGeneName();
}
}
return "UNKNOWN";
}
IndexedFastaSequenceFile seqFile = null;
private double calculateGC(GenomeLoc target) {
if (seqFile == null) {
seqFile = new IndexedFastaSequenceFile(getToolkit().getArguments().referenceFile);
}
ReferenceSequence refSeq = seqFile.getSubsequenceAt(target.getContig(),target.getStart(), target.getStop());
int gcCount = 0;
for(char base : StringUtil.bytesToString(refSeq.getBases()).toCharArray()) {
if (base == 'C' || base == 'c' || base == 'G' || base == 'g') { gcCount++; }
}
return ( (double) gcCount ) / ((double) refSeq.getBases().length);
}
}

View File

@ -1,7 +1,5 @@
package org.broadinstitute.sting.playground.utils.report.utils;
import org.broadinstitute.sting.utils.Utils;
import java.util.*;

View File

@ -1,9 +1,7 @@
package org.broadinstitute.sting.playground.utils.report.utils;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**

View File

@ -37,7 +37,7 @@ import org.broadinstitute.sting.gatk.io.stubs.VCFWriterArgumentTypeDescriptor;
import org.broadinstitute.sting.gatk.io.stubs.OutputStreamArgumentTypeDescriptor;
import org.broadinstitute.sting.gatk.io.stubs.SAMFileReaderArgumentTypeDescriptor;
import org.broadinstitute.sting.gatk.io.stubs.SAMFileWriterArgumentTypeDescriptor;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackManager;
import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder;
import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
@ -65,7 +65,7 @@ public class GATKExtensionsGenerator extends CommandLineProgram {
GenomeAnalysisEngine GATKEngine = new GenomeAnalysisEngine();
WalkerManager walkerManager = new WalkerManager();
FilterManager filterManager = new FilterManager();
RMDTrackManager rmdTrackManager = new RMDTrackManager();
RMDTrackBuilder trackBuilder = new RMDTrackBuilder();
/**
* Required main method implementation.
@ -113,7 +113,7 @@ public class GATKExtensionsGenerator extends CommandLineProgram {
List<ArgumentField> argumentFields = new ArrayList<ArgumentField>();
argumentFields.addAll(ArgumentDefinitionField.getArgumentFields(walkerType));
argumentFields.addAll(RodBindField.getRodArguments(walkerType, rmdTrackManager));
argumentFields.addAll(RodBindField.getRodArguments(walkerType, trackBuilder));
argumentFields.addAll(ReadFilterField.getFilterArguments(walkerType));
writeClass(COMMANDLINE_PACKAGE_NAME + "." + clpClassName, WALKER_PACKAGE_NAME,

View File

@ -26,7 +26,7 @@ package org.broadinstitute.sting.queue.extensions.gatk;
import org.broadinstitute.sting.commandline.Input;
import org.broadinstitute.sting.gatk.WalkerManager;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackManager;
import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder;
import org.broadinstitute.sting.gatk.walkers.RMD;
import org.broadinstitute.sting.gatk.walkers.Walker;
@ -88,7 +88,7 @@ public class RodBindField extends ArgumentField {
return exclusiveOf.toString();
}
public static List<ArgumentField> getRodArguments(Class<? extends Walker> walkerClass, RMDTrackManager rmdTrackManager) {
public static List<ArgumentField> getRodArguments(Class<? extends Walker> walkerClass, RMDTrackBuilder trackBuilder) {
List<ArgumentField> argumentFields = new ArrayList<ArgumentField>();
List<RMD> requires = WalkerManager.getRequiredMetaData(walkerClass);
@ -101,7 +101,7 @@ public class RodBindField extends ArgumentField {
// TODO: Add the field triplet for name=* after @Allows and @Requires are fixed on walkers
//fields.add(new RodBindArgumentField(argumentDefinition, true));
} else {
for (String typeName: rmdTrackManager.getTrackRecordTypeNames(required.type()))
for (String typeName: trackBuilder.getTrackRecordTypeNames(required.type()))
fields.add(new RodBindField(trackName, typeName, fields, true));
}
argumentFields.addAll(fields);
@ -114,7 +114,7 @@ public class RodBindField extends ArgumentField {
// TODO: Add the field triplet for name=* after @Allows and @Requires are fixed on walkers
//fields.add(new RodBindArgumentField(argumentDefinition, false));
} else {
for (String typeName: rmdTrackManager.getTrackRecordTypeNames(allowed.type()))
for (String typeName: trackBuilder.getTrackRecordTypeNames(allowed.type()))
fields.add(new RodBindField(trackName, typeName, fields, true));
}
argumentFields.addAll(fields);

View File

@ -1,14 +1,15 @@
package org.broadinstitute.sting.gatk.datasources.providers;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.datasources.shards.LocusShard;
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
import org.broadinstitute.sting.gatk.datasources.shards.MockLocusShard;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
import org.broadinstitute.sting.gatk.refdata.TabularROD;
import org.broadinstitute.sting.gatk.refdata.tracks.RODRMDTrack;
import org.broadinstitute.sting.gatk.refdata.features.table.TableCodec;
import org.broadinstitute.sting.gatk.refdata.features.table.TableFeature;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.junit.Assert;
import org.junit.BeforeClass;
@ -43,6 +44,11 @@ public class ReferenceOrderedViewUnitTest extends BaseTest {
*/
private static IndexedFastaSequenceFile seq;
/**
* our track builder
*/
RMDTrackBuilder builder = new RMDTrackBuilder();
@BeforeClass
public static void init() throws FileNotFoundException {
// sequence
@ -69,8 +75,8 @@ public class ReferenceOrderedViewUnitTest extends BaseTest {
@Test
public void testSingleBinding() {
File file = new File(testDir + "TabularDataTest.dat");
ReferenceOrderedData rod = new ReferenceOrderedData("tableTest", file, TabularROD.class);
ReferenceOrderedDataSource dataSource = new ReferenceOrderedDataSource(null, new RODRMDTrack(TabularROD.class,"tableTest",file,rod));
RMDTrack track = builder.createInstanceOfTrack(TableCodec.class,"tableTest",file);
ReferenceOrderedDataSource dataSource = new ReferenceOrderedDataSource(null,track);
Shard shard = new MockLocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chrM",1,30)));
@ -78,7 +84,7 @@ public class ReferenceOrderedViewUnitTest extends BaseTest {
ReferenceOrderedView view = new ManagingReferenceOrderedView( provider );
RefMetaDataTracker tracker = view.getReferenceOrderedDataAtLocus(GenomeLocParser.createGenomeLoc("chrM",20));
TabularROD datum = tracker.lookup("tableTest",TabularROD.class);
TableFeature datum = tracker.lookup("tableTest",TableFeature.class);
Assert.assertEquals("datum parameter for COL1 is incorrect", "C", datum.get("COL1"));
Assert.assertEquals("datum parameter for COL2 is incorrect", "D", datum.get("COL2"));
@ -92,10 +98,11 @@ public class ReferenceOrderedViewUnitTest extends BaseTest {
public void testMultipleBinding() {
File file = new File(testDir + "TabularDataTest.dat");
ReferenceOrderedData rod1 = new ReferenceOrderedData("tableTest1", file, TabularROD.class);
ReferenceOrderedDataSource dataSource1 = new ReferenceOrderedDataSource(null,new RODRMDTrack(TabularROD.class,"tableTest1",file,rod1));
ReferenceOrderedData rod2 = new ReferenceOrderedData("tableTest2", file, TabularROD.class);
ReferenceOrderedDataSource dataSource2 = new ReferenceOrderedDataSource(null,new RODRMDTrack(TabularROD.class,"tableTest2",file,rod2));;
RMDTrack track = builder.createInstanceOfTrack(TableCodec.class,"tableTest1",file);
ReferenceOrderedDataSource dataSource1 = new ReferenceOrderedDataSource(null,track);
RMDTrack track2 = builder.createInstanceOfTrack(TableCodec.class,"tableTest2",file);
ReferenceOrderedDataSource dataSource2 = new ReferenceOrderedDataSource(null,track2);
Shard shard = new MockLocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chrM",1,30)));
@ -104,13 +111,13 @@ public class ReferenceOrderedViewUnitTest extends BaseTest {
ReferenceOrderedView view = new ManagingReferenceOrderedView( provider );
RefMetaDataTracker tracker = view.getReferenceOrderedDataAtLocus(GenomeLocParser.createGenomeLoc("chrM",20));
TabularROD datum1 = tracker.lookup("tableTest1",TabularROD.class);
TableFeature datum1 = tracker.lookup("tableTest1",TableFeature.class);
Assert.assertEquals("datum1 parameter for COL1 is incorrect", "C", datum1.get("COL1"));
Assert.assertEquals("datum1 parameter for COL2 is incorrect", "D", datum1.get("COL2"));
Assert.assertEquals("datum1 parameter for COL3 is incorrect", "E", datum1.get("COL3"));
TabularROD datum2 = tracker.lookup("tableTest2",TabularROD.class);
TableFeature datum2 = tracker.lookup("tableTest2", TableFeature.class);
Assert.assertEquals("datum2 parameter for COL1 is incorrect", "C", datum2.get("COL1"));
Assert.assertEquals("datum2 parameter for COL2 is incorrect", "D", datum2.get("COL2"));

View File

@ -2,9 +2,10 @@ package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
import org.broadinstitute.sting.gatk.refdata.TabularROD;
import org.broadinstitute.sting.gatk.refdata.features.table.TableCodec;
import org.broadinstitute.sting.gatk.refdata.features.table.TableFeature;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
import org.broadinstitute.sting.gatk.refdata.tracks.RODRMDTrack;
import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder;
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
@ -47,13 +48,13 @@ public class ReferenceOrderedDataPoolUnitTest extends BaseTest {
public static void init() throws FileNotFoundException {
File sequenceFile = new File(hg18Reference);
GenomeLocParser.setupRefContigOrdering(new IndexedFastaSequenceFile(sequenceFile));
TabularROD.setDelimiter(TabularROD.DEFAULT_DELIMITER, TabularROD.DEFAULT_DELIMITER_REGEX);
}
@Before
public void setUp() {
File file = new File(testDir + "TabularDataTest.dat");
rod = new RODRMDTrack(TabularROD.class, "tableTest", file, new ReferenceOrderedData("tableTest", file, TabularROD.class));
RMDTrackBuilder builder = new RMDTrackBuilder();
rod = builder.createInstanceOfTrack(TableCodec.class, "tableTest", file);
}
@Test
@ -64,7 +65,7 @@ public class ReferenceOrderedDataPoolUnitTest extends BaseTest {
Assert.assertEquals("Number of iterators in the pool is incorrect", 1, iteratorPool.numIterators());
Assert.assertEquals("Number of available iterators in the pool is incorrect", 0, iteratorPool.numAvailableIterators());
TabularROD datum = (TabularROD)iterator.next().get(0).getUnderlyingObject();
TableFeature datum = (TableFeature)iterator.next().get(0).getUnderlyingObject();
assertTrue(datum.getLocation().equals(testSite1));
assertTrue(datum.get("COL1").equals("A"));
@ -90,26 +91,26 @@ public class ReferenceOrderedDataPoolUnitTest extends BaseTest {
// Test out-of-order access: first iterator2, then iterator1.
// Ugh...first call to a region needs to be a seek.
TabularROD datum = (TabularROD)iterator2.seekForward(testSite2).get(0).getUnderlyingObject();
TableFeature datum = (TableFeature)iterator2.seekForward(testSite2).get(0).getUnderlyingObject();
assertTrue(datum.getLocation().equals(testSite2));
assertTrue(datum.get("COL1").equals("C"));
assertTrue(datum.get("COL2").equals("D"));
assertTrue(datum.get("COL3").equals("E"));
datum = (TabularROD)iterator1.next().get(0).getUnderlyingObject();
datum = (TableFeature)iterator1.next().get(0).getUnderlyingObject();
assertTrue(datum.getLocation().equals(testSite1));
assertTrue(datum.get("COL1").equals("A"));
assertTrue(datum.get("COL2").equals("B"));
assertTrue(datum.get("COL3").equals("C"));
// Advance iterator2, and make sure both iterator's contents are still correct.
datum = (TabularROD)iterator2.next().get(0).getUnderlyingObject();
datum = (TableFeature)iterator2.next().get(0).getUnderlyingObject();
assertTrue(datum.getLocation().equals(testSite3));
assertTrue(datum.get("COL1").equals("F"));
assertTrue(datum.get("COL2").equals("G"));
assertTrue(datum.get("COL3").equals("H"));
datum = (TabularROD)iterator1.next().get(0).getUnderlyingObject();
datum = (TableFeature)iterator1.next().get(0).getUnderlyingObject();
assertTrue(datum.getLocation().equals(testSite2));
assertTrue(datum.get("COL1").equals("C"));
assertTrue(datum.get("COL2").equals("D"));
@ -135,7 +136,7 @@ public class ReferenceOrderedDataPoolUnitTest extends BaseTest {
Assert.assertEquals("Number of iterators in the pool is incorrect", 1, iteratorPool.numIterators());
Assert.assertEquals("Number of available iterators in the pool is incorrect", 0, iteratorPool.numAvailableIterators());
TabularROD datum = (TabularROD)iterator.next().get(0).getUnderlyingObject();
TableFeature datum = (TableFeature)iterator.next().get(0).getUnderlyingObject();
assertTrue(datum.getLocation().equals(testSite1));
assertTrue(datum.get("COL1").equals("A"));
assertTrue(datum.get("COL2").equals("B"));
@ -150,7 +151,7 @@ public class ReferenceOrderedDataPoolUnitTest extends BaseTest {
Assert.assertEquals("Number of iterators in the pool is incorrect", 1, iteratorPool.numIterators());
Assert.assertEquals("Number of available iterators in the pool is incorrect", 0, iteratorPool.numAvailableIterators());
datum = (TabularROD)iterator.seekForward(testSite3).get(0).getUnderlyingObject();
datum = (TableFeature)iterator.seekForward(testSite3).get(0).getUnderlyingObject();
assertTrue(datum.getLocation().equals(testSite3));
assertTrue(datum.get("COL1").equals("F"));
assertTrue(datum.get("COL2").equals("G"));
@ -170,7 +171,7 @@ public class ReferenceOrderedDataPoolUnitTest extends BaseTest {
Assert.assertEquals("Number of iterators in the pool is incorrect", 1, iteratorPool.numIterators());
Assert.assertEquals("Number of available iterators in the pool is incorrect", 0, iteratorPool.numAvailableIterators());
TabularROD datum = (TabularROD)iterator.seekForward(testSite3).get(0).getUnderlyingObject();
TableFeature datum = (TableFeature)iterator.seekForward(testSite3).get(0).getUnderlyingObject();
assertTrue(datum.getLocation().equals(testSite3));
assertTrue(datum.get("COL1").equals("F"));
assertTrue(datum.get("COL2").equals("G"));
@ -185,7 +186,7 @@ public class ReferenceOrderedDataPoolUnitTest extends BaseTest {
Assert.assertEquals("Number of iterators in the pool is incorrect", 2, iteratorPool.numIterators());
Assert.assertEquals("Number of available iterators in the pool is incorrect", 1, iteratorPool.numAvailableIterators());
datum = (TabularROD)iterator.next().get(0).getUnderlyingObject();
datum = (TableFeature)iterator.next().get(0).getUnderlyingObject();
assertTrue(datum.getLocation().equals(testSite1));
assertTrue(datum.get("COL1").equals("A"));
assertTrue(datum.get("COL2").equals("B"));

View File

@ -1,247 +0,0 @@
package org.broadinstitute.sting.gatk.refdata;
/*
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.oneoffprojects.variantcontext.Allele;
import org.broadinstitute.sting.oneoffprojects.variantcontext.Genotype;
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
import org.broadinstitute.sting.utils.exceptions.StingException;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.junit.BeforeClass;
import org.junit.Test;
import org.junit.Assert;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.BufferedReader;
import java.io.FileReader;
import java.util.*;
/**
* Created by IntelliJ IDEA.
* User: chartl
* Date: Jan 22, 2010
* Time: 11:27:33 PM
* To change this template use File | Settings | File Templates.
*
public class PlinkRodUnitTest extends BaseTest {
// todo :: get the isIndel() isInsertion() and isDeletion() tests working again -- this may require new
// todo :: methods in the objects themselves
private static IndexedFastaSequenceFile seq;
@BeforeClass
public static void beforeTests() {
try {
seq = new IndexedFastaSequenceFile(new File(b36KGReference));
} catch (FileNotFoundException e) {
throw new StingException("unable to load the sequence dictionary");
}
GenomeLocParser.setupRefContigOrdering(seq);
}
public BufferedReader openFile(String filename) {
try {
return new BufferedReader(new FileReader(filename));
} catch (FileNotFoundException e) {
throw new StingException("Couldn't open file " + filename);
}
}
@Test
public void testStandardPedFile() {
PlinkRod rod = new PlinkRod("test");
try {
rod.initialize( new File("/humgen/gsa-hpprojects/GATK/data/Validation_Data/test/plink_rod_test/standard_plink_test.ped") );
} catch ( FileNotFoundException e ) {
throw new StingException("test file for testStandardPedFile() does not exist",e);
}
// test that the sample names are correct
List<String> rodNames = rod.getVariantSampleNames();
List<String> expectedNames = Arrays.asList("NA12887","NAMELY","COWBA");
Assert.assertEquals("That there are as many samples in the rod as in the expected list",expectedNames.size(),rodNames.size());
boolean namesCorrect = true;
for ( int i = 0; i < expectedNames.size(); i++ ) {
namesCorrect = namesCorrect && ( rodNames.get(i).equals(expectedNames.get(i)) );
}
Assert.assertTrue("That the names are correct and in the proper order",namesCorrect);
// test that rod can be iterated over
ArrayList<ArrayList<Genotype>> genotypesInRod = new ArrayList<ArrayList<Genotype>>();
ArrayList<ArrayList<String>> sampleNamesInRod = new ArrayList<ArrayList<String>>();
ArrayList<GenomeLoc> lociInRod = new ArrayList<GenomeLoc>();
do {
genotypesInRod.add(rod.getGenotypes());
sampleNamesInRod.add(rod.getVariantSampleNames());
lociInRod.add(rod.getLocation());
} while ( rod.parseLine(null,null) );
Assert.assertEquals("That there are three SNPs in the ROD",3,genotypesInRod.size());
ArrayList<Genotype> snp1 = genotypesInRod.get(0);
ArrayList<Genotype> snp3 = genotypesInRod.get(2);
Assert.assertEquals("That there are three Genotypes in SNP 1",3,snp1.size());
Assert.assertEquals("That there are three samples in SNP 2", 3, sampleNamesInRod.get(1).size());
Assert.assertEquals("That there are three Genotypes in SNP 3",3,snp3.size());
List<Allele> snp1_individual3_alleles = snp1.get(2).getAlleles();
List<Allele> snp3_individual2_alleles = snp3.get(1).getAlleles();
String alleleStr1 = new String(snp1_individual3_alleles.get(0).getBases())+new String(snp1_individual3_alleles.get(1).getBases());
String alleleStr2 = new String(snp3_individual2_alleles.get(0).getBases())+new String(snp3_individual2_alleles.get(1).getBases());
Assert.assertEquals("That the third genotype of snp 1 is correctly no-call","00",alleleStr1);
Assert.assertEquals("That the second genotype of snp 3 is correctly G G","GG",alleleStr2);
boolean name2isSame = true;
for ( ArrayList<String> names : sampleNamesInRod ) {
name2isSame = name2isSame && names.get(1).equals("NAMELY");
}
Assert.assertTrue("That the second name of all the genotypes is the same and is correct",name2isSame);
// test that the loci are correctly parsed and in order
List<String> expectedLoci = Arrays.asList("1:123456","2:13274","3:11111");
boolean lociCorrect = true;
for ( int i = 0; i < 3; i ++ ) {
lociCorrect = lociCorrect && lociInRod.get(i).toString().equals(expectedLoci.get(i));
}
}
@Test
public void testStandardPedFileWithIndels() {
PlinkRod rod = new PlinkRod("test");
try {
rod.initialize(new File("/humgen/gsa-hpprojects/GATK/data/Validation_Data/test/plink_rod_test/standard_plink_with_indels.ped") );
} catch ( FileNotFoundException e) {
throw new StingException("Test file for testStandardPedFileWithIndels() could not be found", e);
}
// Iterate through the rod
List<ArrayList<Genotype>> genotypesInRod = new ArrayList<ArrayList<Genotype>>();
ArrayList<ArrayList<String>> sampleNamesInRod = new ArrayList<ArrayList<String>>();
ArrayList<GenomeLoc> lociInRod = new ArrayList<GenomeLoc>();
ArrayList<Boolean> snpSites = new ArrayList<Boolean>();
do {
genotypesInRod.add(rod.getGenotypes());
sampleNamesInRod.add(rod.getVariantSampleNames());
lociInRod.add(rod.getLocation());
snpSites.add(rod.variantIsSNP());
} while ( rod.parseLine(null,null) );
boolean snpOrder = true;
List<Boolean> expectedOrder = Arrays.asList(true,false,true,false);
for ( int i = 0; i < 4; i ++ ) {
snpOrder = snpOrder && ( expectedOrder.get(i) == snpSites.get(i) );
}
Assert.assertTrue("That the variant type order is as expected", snpOrder);
// Assert.assertTrue("That the second genotype of second variant is not a point mutation", ! genotypesInRod.get(1).get(1). );
// Assert.assertTrue("That the second genotype of fourth variant is not a point mutation", ! genotypesInRod.get(3).get(1).isPointGenotype() );
Assert.assertTrue("That the second genotype of fourth variant is homozygous", genotypesInRod.get(3).get(1).isHom());
Assert.assertTrue("That the fourth genotype of fourth variant is heterozygous",genotypesInRod.get(3).get(3).isHet());
Assert.assertEquals("That the reference deletion genotype has the correct string", "ATTTAT",genotypesInRod.get(3).get(2).getAlleles().get(0).getBases());
Assert.assertEquals("That the insertion bases are correct","CTC",genotypesInRod.get(1).get(2).getAlleles().get(0).getBases());
Assert.assertEquals("That the snp bases are correct","GC",new String(genotypesInRod.get(2).get(2).getAlleles().get(0).getBases())+new String(genotypesInRod.get(2).get(2).getAlleles().get(1).getBases()));
}
@Test
public void testBinaryPedFileNoIndels() {
PlinkRod rod = new PlinkRod("binaryTest1");
try {
rod.initialize(new File("/humgen/gsa-hpprojects/GATK/data/Validation_Data/test/plink_rod_test/binary_noindel_test.bed"));
} catch (FileNotFoundException e) {
throw new StingException("Test file for testBinaryPedFileNoIndels() could not be found",e);
}
// iterate through the ROD and get stuff
ArrayList<GenomeLoc> lociInRod = new ArrayList<GenomeLoc>();
ArrayList<ArrayList<Genotype>> genotypesInRod = new ArrayList<ArrayList<Genotype>>();
ArrayList<ArrayList<String>> samplesInRod = new ArrayList<ArrayList<String>>();
do {
lociInRod.add(rod.getLocation());
genotypesInRod.add(rod.getGenotypes());
samplesInRod.add(rod.getVariantSampleNames());
} while ( rod.parseLine(null,null) );
List<String> expecLoc = Arrays.asList("1:123456","1:14327877","2:22074511","3:134787","3:178678","4:829645","4:5234132","12:1268713");
for ( int i = 0; i < expecLoc.size(); i ++ ) {
Assert.assertEquals("That locus "+(i+1)+" in the rod is correct", expecLoc.get(i), lociInRod.get(i).toString());
}
List<String> expecAlleles = Arrays.asList("AA","AA","AA","GG","GG","GG","AA","TA","TT","CC","CC","GC","TC","CC","TT",
"GG","GG","AG","TT","CC","CT","TG","GG","GG");
List<Boolean> expecHet = Arrays.asList(false,false,false,false,false,false,false,true,false,false,false,true,true,false,
false,false,false,true,false,false,true,true,false,false);
List<String> expecName = Arrays.asList("NA12878","NA12890","NA07000","NA12878","NA12890","NA07000","NA12878","NA12890","NA07000",
"NA12878","NA12890","NA07000","NA12878","NA12890","NA07000","NA12878","NA12890","NA07000","NA12878","NA12890","NA07000",
"NA12878","NA12890","NA07000");
int snpNo = 1;
int indiv = 1;
int alleleOffset = 0;
for ( ArrayList<Genotype> snp : genotypesInRod ) {
for ( Genotype gen : snp ) {
String alStr = new String(gen.getAlleles().get(0).getBases())+new String(gen.getAlleles().get(1).getBases());
Assert.assertEquals("That the allele of person "+indiv+" for snp "+snpNo+" is correct "+
"(allele offset "+alleleOffset+")", expecAlleles.get(alleleOffset),alStr);
Assert.assertEquals("That the genotype of person "+indiv+" for snp "+snpNo+" is properly set", expecHet.get(alleleOffset),gen.isHet());
Assert.assertEquals("That the name of person "+indiv+" for snp "+snpNo+" is correct", expecName.get(alleleOffset),samplesInRod.get(snpNo-1).get(indiv-1));
indiv++;
alleleOffset++;
}
indiv = 1;
snpNo++;
}
}
@Test
public void testIndelBinary() {
PlinkRod rod = new PlinkRod("binaryTest2");
try {
rod.initialize(new File("/humgen/gsa-hpprojects/GATK/data/Validation_Data/test/plink_rod_test/binary_indel_test.bed"));
} catch (FileNotFoundException e) {
throw new StingException("Test file for testBinaryPedFileNoIndels() could not be found",e);
}
ArrayList<ArrayList<Genotype>> genotypesInRod = new ArrayList<ArrayList<Genotype>>();
do {
genotypesInRod.add(rod.getGenotypes());
} while ( rod.parseLine(null,null) );
List<String> expecAlleles = Arrays.asList("ACCA","","ACCAACCA","GGGG","GG","","AA","TA","00","","CCTCCT","CCT",
"TC","CC","TT","GG","GG","AG","","CTTGCTTG","CTTG","TG","GG","GG");
List<Boolean> expecDeletion = Arrays.asList(false,false,false,false,false,false,false,false,false,true,false,true,
false,false,false,false,false,false,true,false,true,false,false,false);
List<Boolean> expecInsertion = Arrays.asList(true,false,true,true,true,false,false,false,false,false,false,false,
false,false,false,false,false,false,false,false,false,false,false,false);
int al = 0;
for ( ArrayList<Genotype> snp : genotypesInRod ) {
for ( Genotype gen : snp ) {
String alStr = new String(gen.getAlleles().get(0).getBases())+new String(gen.getAlleles().get(1).getBases());
Allele firstAl = gen.getAlleles().get(0);
Allele secondAl = gen.getAlleles().get(1);
// boolean isInsertion = ( firstAl.getType() == Allele.AlleleType.INSERTION || secondAl.getType() == Allele.AlleleType.INSERTION );
// boolean isDeletion = ( firstAl.getType() == Allele.AlleleType.DELETION || secondAl.getType() == Allele.AlleleType.DELETION );
Assert.assertEquals("That the indel file has the correct alleles for genotype "+al,expecAlleles.get(al), alStr);
// Assert.assertEquals("That the insertion property of genotype "+al+" is correct",expecInsertion.get(al),isInsertion);
// Assert.assertEquals("That the deletion property of genotype "+al+" is correct", expecDeletion.get(al), isDeletion);
al++;
}
}
}
}*/

View File

@ -1,236 +0,0 @@
// our package
package org.broadinstitute.sting.gatk.refdata;
// the imports for unit testing.
import net.sf.picard.reference.ReferenceSequenceFile;
import net.sf.picard.reference.IndexedFastaSequenceFile;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeatureIterator;
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Arrays;
import static org.junit.Assert.assertTrue;
/**
* Basic unit test for TabularROD
*
*/
public class TabularRODUnitTest extends BaseTest {
private static ReferenceSequenceFile seq;
private ReferenceOrderedData ROD;
private LocationAwareSeekableRODIterator iter;
@BeforeClass
public static void init() throws FileNotFoundException {
// sequence
seq = new IndexedFastaSequenceFile(new File(hg18Reference));
GenomeLocParser.setupRefContigOrdering(seq);
}
@Before
public void setupTabularROD() {
TabularROD.setDelimiter(TabularROD.DEFAULT_DELIMITER, TabularROD.DEFAULT_DELIMITER_REGEX);
File file = new File(testDir + "TabularDataTest.dat");
ROD = new ReferenceOrderedData("tableTest", file, TabularROD.class);
iter = new SeekableRODIterator(new GATKFeatureIterator(ROD.iterator()));
}
@Test
public void test1() {
logger.warn("Executing test1");
RODRecordList oneList = iter.next();
TabularROD one = (TabularROD)oneList.get(0).getUnderlyingObject();
assertTrue(one.size() == 4);
assertTrue(one.getLocation().equals(GenomeLocParser.createGenomeLoc("chrM", 10)));
assertTrue(one.get("COL1").equals("A"));
assertTrue(one.get("COL2").equals("B"));
assertTrue(one.get("COL3").equals("C"));
}
@Test
public void test2() {
logger.warn("Executing test2");
RODRecordList oneList = iter.next();
RODRecordList twoList = iter.next();
TabularROD one = (TabularROD)oneList.get(0).getUnderlyingObject();
TabularROD two = (TabularROD)twoList.get(0).getUnderlyingObject();
assertTrue(two.size() == 4);
assertTrue(two.getLocation().equals(GenomeLocParser.createGenomeLoc("chrM", 20)));
assertTrue(two.get("COL1").equals("C"));
assertTrue(two.get("COL2").equals("D"));
assertTrue(two.get("COL3").equals("E"));
}
@Test
public void test3() {
logger.warn("Executing test3");
RODRecordList oneList = iter.next();
RODRecordList twoList = iter.next();
RODRecordList threeList = iter.next();
TabularROD one = (TabularROD)oneList.get(0).getUnderlyingObject();
TabularROD two = (TabularROD)twoList.get(0).getUnderlyingObject();
TabularROD three = (TabularROD)threeList.get(0).getUnderlyingObject();
assertTrue(three.size() == 4);
assertTrue(three.getLocation().equals(GenomeLocParser.createGenomeLoc("chrM", 30)));
assertTrue(three.get("COL1").equals("F"));
assertTrue(three.get("COL2").equals("G"));
assertTrue(three.get("COL3").equals("H"));
}
@Test
public void testDone() {
logger.warn("Executing testDone");
RODRecordList oneList = iter.next();
RODRecordList twoList = iter.next();
RODRecordList threeList = iter.next();
TabularROD one = (TabularROD)oneList.get(0).getUnderlyingObject();
TabularROD two = (TabularROD)twoList.get(0).getUnderlyingObject();
TabularROD three = (TabularROD)threeList.get(0).getUnderlyingObject();
assertTrue(!iter.hasNext());
}
@Test
public void testSeek() {
logger.warn("Executing testSeek");
RODRecordList twoList = iter.seekForward(GenomeLocParser.createGenomeLoc("chrM", 20));
TabularROD two = (TabularROD)twoList.get(0).getUnderlyingObject();
assertTrue(two.size() == 4);
assertTrue(two.getLocation().equals(GenomeLocParser.createGenomeLoc("chrM", 20)));
assertTrue(two.get("COL1").equals("C"));
assertTrue(two.get("COL2").equals("D"));
assertTrue(two.get("COL3").equals("E"));
}
@Test
public void testToString() {
logger.warn("Executing testToString");
RODRecordList oneList = iter.next();
TabularROD one = (TabularROD)oneList.get(0).getUnderlyingObject();
assertTrue(one.toString().equals("chrM:10\tA\tB\tC"));
}
// Didn't change the delimiter
@Test (expected = RuntimeException.class)
public void testDelim1() {
File file2 = new File(testDir + "TabularDataTest2.dat");
ReferenceOrderedData ROD_commas = new ReferenceOrderedData("tableTest", file2, TabularROD.class);
LocationAwareSeekableRODIterator iter_commas = new SeekableRODIterator(new GATKFeatureIterator(ROD_commas.iterator()));
logger.warn("Executing testDelim1");
RODRecordList one2List = iter_commas.next();
TabularROD one2 = (TabularROD)one2List.get(0).getUnderlyingObject();
assertTrue(one2.size() == 5);
assertTrue(one2.getLocation().equals(GenomeLocParser.createGenomeLoc("chrM", 10)));
assertTrue(one2.get("COL1").equals("A"));
assertTrue(one2.get("COL2").equals("B"));
assertTrue(one2.get("COL3").equals("C"));
assertTrue(one2.get("COL4").equals("1"));
}
@Test
public void testDelim2() {
TabularROD.setDelimiter(",",",");
File file2 = new File(testDir + "TabularDataTest2.dat");
ReferenceOrderedData ROD_commas = new ReferenceOrderedData("tableTest", file2, TabularROD.class);
LocationAwareSeekableRODIterator iter_commas = new SeekableRODIterator(new GATKFeatureIterator(ROD_commas.iterator()));
logger.warn("Executing testDelim1");
RODRecordList one2List = iter_commas.next();
TabularROD one2 = (TabularROD)one2List.get(0).getUnderlyingObject();
assertTrue(one2.size() == 5);
assertTrue(one2.getLocation().equals(GenomeLocParser.createGenomeLoc("chrM", 10)));
assertTrue(one2.get("COL1").equals("A"));
assertTrue(one2.get("COL2").equals("B"));
assertTrue(one2.get("COL3").equals("C"));
assertTrue(one2.get("COL4").equals("1"));
}
@Test
public void testCreation() {
logger.warn("Executing testCreation");
ArrayList<String> header = new ArrayList<String>(Arrays.asList("HEADER", "col1", "col2", "col3"));
assertTrue(TabularROD.headerString(header).equals("HEADER\tcol1\tcol2\tcol3"));
String rowData = String.format("%d %d %d", 1, 2, 3);
TabularROD row = new TabularROD("myName", header, GenomeLocParser.createGenomeLoc("chrM", 1), rowData.split(" "));
System.out.println(">>>>> " + row.toString());
assertTrue(row.toString().equals("chrM:1\t1\t2\t3"));
}
@Test
public void testCreationAndWriting() throws FileNotFoundException {
logger.warn("Executing testCreationAndWriting");
File outputFile = new File(testDir + "testTabularRodOutputTemp.dat");
PrintStream out = new PrintStream(new FileOutputStream(outputFile));
ArrayList<String> header = new ArrayList<String>(Arrays.asList("HEADER", "col1", "col2", "col3"));
out.println(TabularROD.commentString("Hello, created from test"));
out.println(TabularROD.commentString(""));
out.println(TabularROD.headerString(header));
String rowData = String.format("%d %d %d", 1, 2, 3);
TabularROD row = new TabularROD("myName", header, GenomeLocParser.createGenomeLoc("chrM", 1), rowData.split(" "));
out.println(row.toString());
rowData = String.format("%d %d %d", 3, 4, 5);
row = new TabularROD("myName", header, GenomeLocParser.createGenomeLoc("chrM", 2), rowData.split(" "));
out.println(row.toString());
ReferenceOrderedData ROD_commas = new ReferenceOrderedData("tableTest", outputFile, TabularROD.class);
LocationAwareSeekableRODIterator iter_commas = new SeekableRODIterator(new GATKFeatureIterator(ROD_commas.iterator()));
RODRecordList oneList = iter_commas.next();
TabularROD one = (TabularROD)oneList.get(0).getUnderlyingObject();
assertTrue(one.size() == 4);
assertTrue(one.getLocation().equals(GenomeLocParser.createGenomeLoc("chrM", 1)));
assertTrue(one.get("col1").equals("1"));
assertTrue(one.get("col2").equals("2"));
assertTrue(one.get("col3").equals("3"));
RODRecordList twoList = iter_commas.next();
TabularROD two = (TabularROD)twoList.get(0).getUnderlyingObject();
assertTrue(two.size() == 4);
assertTrue(two.getLocation().equals(GenomeLocParser.createGenomeLoc("chrM", 2)));
assertTrue(two.get("col1").equals("3"));
assertTrue(two.get("col2").equals("4"));
assertTrue(two.get("col3").equals("5"));
}
/* @Test (expected=RuntimeException.class )
public void testBadHeader1() {
logger.warn("Executing testBadHeader1");
ArrayList<String> header = new ArrayList<String>();
TabularROD row = new TabularROD("myName", header, GenomeLocParser.createGenomeLoc("chrM", 1));
}*/
/* @Test (expected=RuntimeException.class )
public void testBadHeader2() {
logger.warn("Executing testBadHeader2");
ArrayList<String> header = new ArrayList<String>(Arrays.asList("col1", "col2", "col3"));
TabularROD row = new TabularROD("myName", header, GenomeLocParser.createGenomeLoc("chrM", 1));
}*/
@Test (expected=RuntimeException.class )
public void testBadData1() {
logger.warn("Executing testBadData1");
ArrayList<String> header = new ArrayList<String>(Arrays.asList("HEADER", "col1", "col2", "col3"));
assertTrue(TabularROD.headerString(header).equals("HEADER\tcol1\tcol2\tcol3"));
String rowData = String.format("%d %d %d %d", 1, 2, 3, 4);
TabularROD row = new TabularROD("myName", header, GenomeLocParser.createGenomeLoc("chrM", 1), rowData.split(" "));
}
}

View File

@ -1,137 +0,0 @@
/*
* Copyright (c) 2010. The Broad Institute
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.refdata.tracks;
import net.sf.samtools.SAMSequenceRecord;
import net.sf.picard.reference.IndexedFastaSequenceFile;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
/**
* class RMDTrackManagerUnitTest
* tests out the ability of the RMDTrackManager to correctly create RMDtracks based on the requested types.
*/
public class RMDTrackManagerUnitTest extends BaseTest {
List<String> triplets;
List<RMDTrack> tracks;
@Before
public void setup() {
RMDTrackManager manager = new RMDTrackManager();
triplets = new ArrayList<String>();
// add our db snp data
triplets.add("MyDbSNP,DBSNP,testdata/small.dbsnp.rod");
// TODO: Aaron remove following comment, reinstate line
//tracks = manager.getReferenceMetaDataSources(triplets);
}
@Test // TODO: Aaron remove me
public void voidTest() {
}
//@Test -- TODO: Aaron fix with next round of Tribble integration
public void testBuilderQuery() {
for (RMDTrack t : tracks) {
System.err.println("name = " + t.getName() + " type = " + t.getType().getSimpleName() + " file = " + t.getFile());
int count = 0;
Iterator<GATKFeature> fIter;
try {
fIter = ((TribbleTrack) t).query("1", 1, 5000);
} catch (IOException e) {
throw new ReviewedStingException("blah I/O exception");
}
while (fIter.hasNext()) {
fIter.next();
count++;
}
Assert.assertEquals(100, count);
}
}
//@Test
public void testBuilderIterator() {
for (RMDTrack t : tracks) {
System.err.println("name = " + t.getName() + " type = " + t.getType().getSimpleName() + " file = " + t.getFile());
int count = 0;
Iterator<GATKFeature> fIter = t.getIterator();
while (fIter.hasNext()) {
fIter.next();
count++;
}
Assert.assertEquals(100, count);
}
}
// @Test used only to determine how fast queries are, don't uncomment! (unless you know what you're doing).
public void testSpeedOfRealQuery() {
IndexedFastaSequenceFile file = null;
file = new IndexedFastaSequenceFile(new File(b36KGReference));
final int intervalSize = 10000000;
GenomeLocParser.setupRefContigOrdering(file.getSequenceDictionary());
RMDTrackManager manager = new RMDTrackManager();
// add our db snp data
triplets.clear();
triplets.add("db");
triplets.add("DBSNP");
triplets.add("../../GATK_Data/dbsnp_130_b36.rod");
Assert.assertEquals(1, manager.getReferenceMetaDataSources(null,triplets).size());
RMDTrack t = manager.getReferenceMetaDataSources(null,triplets).get(0);
// make sure we have a single track
// lets test the first and 20th contigs of the human reference
for (int loop = 1; loop <= 22; loop++) {
SAMSequenceRecord seqRec = GenomeLocParser.getContigInfo(String.valueOf(loop));
String name = seqRec.getSequenceName();
Iterator<GATKFeature> fIter;
for (int x = 1; x < seqRec.getSequenceLength() - intervalSize; x += intervalSize) {
long firstTime = System.currentTimeMillis();
long count = 0;
try {
fIter = ((TribbleTrack) t).query("1", x, x + intervalSize);
} catch (IOException e) {
throw new ReviewedStingException("blah I/O exception");
}
while (fIter.hasNext()) {
fIter.next();
count++;
}
System.err.println(name + "," + count + "," + (System.currentTimeMillis() - firstTime));
}
}
}
}

View File

@ -1,274 +0,0 @@
package org.broadinstitute.sting.gatk.refdata.tracks.builders;
import net.sf.picard.reference.IndexedFastaSequenceFile;
import net.sf.samtools.SAMSequenceDictionary;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import org.broad.tribble.Feature;
import org.broad.tribble.index.Index;
import org.broad.tribble.index.linear.LinearIndex;
import org.broad.tribble.iterators.CloseableTribbleIterator;
import org.broad.tribble.source.BasicFeatureSource;
import org.broad.tribble.util.LittleEndianOutputStream;
import org.broad.tribble.vcf.VCFCodec;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.refdata.features.annotator.AnnotatorInputTableCodec;
import org.broadinstitute.sting.gatk.refdata.features.annotator.AnnotatorInputTableFeature;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.collections.Pair;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import java.io.*;
import java.util.*;
/**
* performance tests for different index types
*/
public class IndexPerformanceTests extends BaseTest {
// the RMD track builder
private TribbleRMDTrackBuilder builder;
// set the logger level
Logger logger = Logger.getLogger(IndexPerformanceTests.class);
// the input files to test
Map<String, File> inputFiles = new LinkedHashMap<String,File>();
// the input types
Map<String, Class> inputTypes = new HashMap<String,Class>();
// where the vcf files are located
String fileLocation = validationDataLocation + "Index_Performance_Data/";
// bin sizes to try
int[] binSizes = {10, 100, 1000, 5000, 10000, 50000};
PrintWriter writer;
PrintWriter writer2;
/** setup the files we're going to run with, including their names */
@Before
public void setupFilesAndIndexes() {
logger.setLevel(Level.INFO);
builder = new TribbleRMDTrackBuilder();
IndexedFastaSequenceFile seq = new IndexedFastaSequenceFile(new File(hg18Reference));
GenomeLocParser.setupRefContigOrdering(seq);
// the input files
/*inputFiles.put("\"10\"",new File(fileLocation + "tip10.vcf"));
inputFiles.put("\"100\"",new File(fileLocation + "tip100.vcf"));
inputFiles.put("\"1,000\"",new File(fileLocation + "tip1000.vcf"));
inputFiles.put("\"10,000\"",new File(fileLocation + "tip10000.vcf"));
inputFiles.put("\"100,000\"",new File(fileLocation + "tip100000.vcf"));
inputFiles.put("\"1,000,000\"",new File(fileLocation + "tip1000000.vcf"));*/
for (String name : inputFiles.keySet()) {
inputTypes.put(name,VCFCodec.class);
}
inputFiles.put("Big Table",new File("/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/slowAnnotator/big.table.txt"));
inputTypes.put("Big Table", AnnotatorInputTableCodec.class);
}
@Test
public void emptyTest() {
// do nothing
}
//@Test
public void performanceTest() {
try {
writer = new PrintWriter(new FileWriter("testOutput_linear.txt"));
writer2 = new PrintWriter(new FileWriter("testOutput_tree.txt"));
} catch (IOException e) {
Assert.fail("Unable to open file testOutput.txt");
}
writer.println("name,index,binSize,createTime,seekTime,thousandPerThousand,record_count,index_size");
writer2.println("name,index,binSize,createTime,seekTime,thousandPerThousand,record_count,index_size");
for (String name : inputFiles.keySet()) {
for (int size : binSizes) {
System.err.println("running " + name + " with bin size " + size);
printTestLine(name, true, size);
printTestLine(name, false, size);
}
}
writer.close();
writer2.close();
}
private void printTestLine(String name, boolean useLinear, int size) {
PrintWriter wr = (useLinear) ? writer : writer2;
List<Long> values = performIndexTest(name,useLinear, size);
wr.print(name + "," + ((useLinear) ? "linear" : "tree") + "," + size);
for (Long l : values) {
wr.print(",");
wr.print(l);
}
wr.println();
}
/**
* time various tasks using the specified index
* @param name the name to get
* @return a five-piece: the time to create the index, the time to seek to chromosome 1, and the time to process reading
* every other 1000 bases of chr1 (of the first 100M), the count of records seen in the last operation, and the index size
*/
public List<Long> performIndexTest(String name, boolean useLinear, int size) {
deleteIndex(inputFiles.get(name));
// time creating the index
long createTime = System.currentTimeMillis();
Pair<BasicFeatureSource, SAMSequenceDictionary> pairing = builder.createFeatureReader(inputTypes.get(name),inputFiles.get(name));
createTime = System.currentTimeMillis() - createTime;
//System.err.println("index creation took " + createTime);
// seek to chr1
long seekTo1 = seekToChr1(pairing);
// seek every 1000 bases in Chr1
long count = 0;
long thousandEveryThousand = System.currentTimeMillis();
try {
for (int x = 1; x < 1000000; x = x + 1000) {
//CloseableTribbleIterator<Feature> iter = pairing.first.query("chr1", x+(int)Math.floor(Math.random()*1000), x+1000); // query
CloseableTribbleIterator<Feature> iter = pairing.first.query("chr1", x, x+1000); // query
for (Feature feat : iter) {
count++;
}
}
} catch (IOException e) {
Assert.fail("Unable to load file for query!!");
}
thousandEveryThousand = System.currentTimeMillis() - thousandEveryThousand;
//System.err.println("thousand every thousand (for first million) took " + thousandEveryThousand);
return Arrays.asList(createTime,seekTo1,thousandEveryThousand,count,new File(inputFiles.get(name) + ".idx").length());
}
private long seekToChr1(Pair<BasicFeatureSource, SAMSequenceDictionary> pairing) {
// time seeking to the first 1M bases of Chr1
long seekTo1 = System.currentTimeMillis();
try {
CloseableTribbleIterator iter = pairing.first.query("chr1",1,10000000); // query
} catch (IOException e) {
Assert.fail("Unable to load file for query!!");
}
seekTo1 = System.currentTimeMillis() - seekTo1;
//System.err.println("seeking to chr1 took " + seekTo1);
return seekTo1;
}
//@Test
public void testBigTable() {
// store the mapping of location -> variant; this will get big
Map<GenomeLoc, Integer> features = new TreeMap<GenomeLoc,Integer>();
Map<Integer,Integer> bucketToCount = getMapOfFeatures(features,false);
Pair<BasicFeatureSource, SAMSequenceDictionary> pairing;
Map<GenomeLoc, Integer> features2 = new TreeMap<GenomeLoc,Integer>();
Map<Integer,Integer> bucketToCount2 = getMapOfFeatures(features2,true);
System.err.println("Summary: ");
System.err.println("Summary: tree " + features.size());
System.err.println("Summary: linear " + features2.size());
// compare the two
for (Map.Entry<GenomeLoc,Integer> entry: features.entrySet()) {
if (!features2.containsKey(entry.getKey())) {
System.err.println("key " + entry + " missing from linear, count " + entry.getValue());
}
else if (features2.get(entry.getKey()) != entry.getValue()) {
System.err.println("counts are not equal at " +
entry.getKey() +
" features2.get(entry.getKey()) = " +
features2.get(entry.getKey()) +
" feature1 = " + entry.getValue());
}
if (features2.containsKey(entry.getKey())) features2.remove(entry.getKey());
}
System.err.println("Missing from the tree :");
for (Map.Entry<GenomeLoc,Integer> entry2: features2.entrySet()) {
System.err.println("Position " + entry2.getKey() + " count = " + entry2.getValue());
}
for (Integer bucket : bucketToCount.keySet()) {
if (!bucketToCount2.get(bucket).equals(bucketToCount.get(bucket))) {
System.err.println("Bucket " + bucket + " tree != linear, " + bucketToCount2.get(bucket) + " " + bucketToCount.get(bucket));
}
}
}
private Map<Integer,Integer> getMapOfFeatures(Map<GenomeLoc, Integer> features, boolean useLinear) {
File bigTable = new File("/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/slowAnnotator/big.table.txt");
deleteIndex(inputFiles.get("Big Table"));
// time creating the index
logger.warn("creating index, linear = " + useLinear);
Map<Integer,Integer> bucketToCount = new TreeMap<Integer,Integer>();
Pair<BasicFeatureSource, SAMSequenceDictionary> pairing = builder.createFeatureReader(inputTypes.get("Big Table"),inputFiles.get("Big Table"));
logger.warn("created index, traversing");
try {
for (Integer x = 5000; x < 6000; x = x + 1000) {
int bucketCount = 0;
CloseableTribbleIterator<Feature> iter = pairing.first.query("chr1", x, x+1000); // query
for (Feature feat : iter) {
GenomeLoc loc = GenomeLocParser.createGenomeLoc(feat.getChr(),feat.getStart(),feat.getEnd());
if (loc.getStop() < 5000 || loc.getStart() > 6000) continue;
int count = 0;
if (features.containsKey(loc))
count = features.get(loc)+1;
features.put(loc,count);
if (bucketToCount.containsKey(x)) bucketToCount.put(x,bucketToCount.get(x)+1);
else bucketToCount.put(x,1);
}
//bucketToCount.put(x,bucketCount);
}
logger.warn("Done, returning");
} catch (IOException e) {
Assert.fail("Unable to load file for query!!");
}
return bucketToCount;
}
//@Test
public void testGetTreeIndexLocation() {
File bigTable = new File("small.table.txt");
deleteIndex(bigTable);
// time creating the index
logger.warn("creating index");
Map<Integer,Integer> bucketToCount = new TreeMap<Integer,Integer>();
Pair<BasicFeatureSource, SAMSequenceDictionary> pairing = builder.createFeatureReader(inputTypes.get("Big Table"),bigTable);
logger.warn("created index, traversing");
try {
int count= 0;
CloseableTribbleIterator<Feature> iter = null;
for (int x = 5000; x < 6000; x = x + 1000)
iter = pairing.first.query("chr1", x, x+1000); // query
for (Feature feat : iter) {
GenomeLoc loc = GenomeLocParser.createGenomeLoc(feat.getChr(),feat.getStart(),feat.getEnd());
if (loc.getStop() < 5000 || loc.getStart() > 6000) continue;
count++;
System.err.println(feat.toString());
}
System.err.println(count);
} catch (IOException e) {
Assert.fail("Unable to load file for query!!");
}
}
private void deleteIndex(File fl) {
File indexFile = new File(fl + TribbleRMDTrackBuilder.indexExtension);
boolean deleted = true;
if (indexFile.exists())
deleted = indexFile.delete();
if (!deleted)
Assert.fail("Unable to delete index file");
}
}

View File

@ -42,16 +42,16 @@ import java.util.Map;
/**
* @author aaron
* <p/>
* Class TribbleRMDTrackBuilderUnitTest
* Class RMDTrackBuilderUnitTest
* <p/>
* Testing out the builder for tribble Tracks
*/
public class TribbleRMDTrackBuilderUnitTest extends BaseTest {
private TribbleRMDTrackBuilder builder;
public class RMDTrackBuilderUnitTest extends BaseTest {
private RMDTrackBuilder builder;
@Before
public void setup() {
builder = new TribbleRMDTrackBuilder();
builder = new RMDTrackBuilder();
IndexedFastaSequenceFile seq = new IndexedFastaSequenceFile(new File(b36KGReference));
GenomeLocParser.setupRefContigOrdering(seq);
}
@ -73,8 +73,8 @@ public class TribbleRMDTrackBuilderUnitTest extends BaseTest {
Assert.fail("IO exception unexpected" + e.getMessage());
}
// make sure we didn't write the file (check that it's timestamp is within bounds)
//System.err.println(new File(vcfFile + TribbleRMDTrackBuilder.indexExtension).lastModified());
Assert.assertTrue(Math.abs(1279591752000l - new File(vcfFile + TribbleRMDTrackBuilder.indexExtension).lastModified()) < 100);
//System.err.println(new File(vcfFile + RMDTrackBuilder.indexExtension).lastModified());
Assert.assertTrue(Math.abs(1279591752000l - new File(vcfFile + RMDTrackBuilder.indexExtension).lastModified()) < 100);
}

View File

@ -19,7 +19,7 @@ public class PileupWalkerIntegrationTest extends WalkerTest {
String gatk_args = "-T Pileup -I " + validationDataLocation + "FHS_Pileup_Test.bam "
+ "-R " + hg18Reference
+ " -L chr15:46,347,148 -o %s";
String expected_md5 = "052187dd2bf2516a027578c8775856a8";
String expected_md5 = "872e89df166b90e06dd2737535c5d8b3";
WalkerTestSpec spec = new WalkerTestSpec(gatk_args, 1, Arrays.asList(expected_md5));
executeTest("Testing the standard (no-indel) pileup on three merged FHS pools with 27 deletions in 969 bases", spec);
}

View File

@ -83,7 +83,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
@Test
public void testCountCovariatesUseOriginalQuals() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( validationDataLocation + "originalQuals.1kg.chr1.1-1K.bam", "72b79646061d78674a3752272823d47f");
e.put( validationDataLocation + "originalQuals.1kg.chr1.1-1K.bam", "db6c0dca1ec121f8c2e6d20f4969dee5");
for ( Map.Entry<String, String> entry : e.entrySet() ) {
String bam = entry.getKey();
@ -96,7 +96,8 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
" -L 1:1-1,000" +
" -standard" +
" -OQ" +
" -recalFile %s",
" -recalFile %s" +
" --DBSNP /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod",
1, // just one output file
Arrays.asList(md5));
executeTest("testCountCovariatesUseOriginalQuals", spec);

View File

@ -6,7 +6,12 @@ import org.junit.Test;
import java.util.Arrays;
public class SequenomValidationConverterIntegrationTest extends WalkerTest {
@Test
public void testEmpty() {
System.err.println("Reinstate these tests when plink is back in");
}
//@Test TODO: reinstate the test when the Plink rod is back
public void testSNPs() {
String testPedFile = validationDataLocation + "Sequenom_Test_File.txt";
String testArgs = "-R "+b36KGReference + " -T SequenomValidationConverter -B:sequenom,Plink "+testPedFile+" -o %s";

View File

@ -1,6 +1,5 @@
package org.broadinstitute.sting.playground.utils.report.templates;
import org.broadinstitute.sting.playground.utils.report.templates.TextTable;
import org.junit.Assert;
import org.junit.Test;

View File

@ -6,7 +6,7 @@ import org.broad.tribble.util.variantcontext.Genotype;
import org.broad.tribble.util.variantcontext.VariantContext;
import org.broad.tribble.vcf.*;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.refdata.tracks.builders.TribbleRMDTrackBuilder;
import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
@ -76,7 +76,7 @@ public class VCFWriterUnitTest extends BaseTest {
counter++;
}
Assert.assertEquals(2,counter);
new File(fakeVCFFile + TribbleRMDTrackBuilder.indexExtension).delete();
new File(fakeVCFFile + RMDTrackBuilder.indexExtension).delete();
fakeVCFFile.delete();
}
catch (IOException e ) {