Deleting lots of code as part of my cleanup. More classes tagged for removal. Many more walkers have their days numbered.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1885 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
d749a5eb5f
commit
449a6ba75a
|
|
@ -131,11 +131,6 @@ public class GATKArgumentCollection {
|
||||||
@Argument(fullName = "numthreads", shortName = "nt", doc = "How many threads should be allocated to running this analysis.", required = false)
|
@Argument(fullName = "numthreads", shortName = "nt", doc = "How many threads should be allocated to running this analysis.", required = false)
|
||||||
public int numberOfThreads = 1;
|
public int numberOfThreads = 1;
|
||||||
|
|
||||||
@Element(required = false)
|
|
||||||
@Argument(fullName = "LocusIteratorByHanger", shortName = "LIBH", doc = "Should we use the new LocusIteratorByState or the old LocusIteratorByHanger?", required = false)
|
|
||||||
public Boolean useLocusIteratorByHanger = false;
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* marshal the data out to a object
|
* marshal the data out to a object
|
||||||
*
|
*
|
||||||
|
|
@ -271,9 +266,6 @@ public class GATKArgumentCollection {
|
||||||
if (other.numberOfThreads != this.numberOfThreads) {
|
if (other.numberOfThreads != this.numberOfThreads) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (other.useLocusIteratorByHanger != this.useLocusIteratorByHanger ) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -8,7 +8,6 @@ import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
|
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
|
||||||
import org.broadinstitute.sting.gatk.iterators.LocusIterator;
|
import org.broadinstitute.sting.gatk.iterators.LocusIterator;
|
||||||
import org.broadinstitute.sting.gatk.iterators.LocusIteratorByHanger;
|
|
||||||
import org.broadinstitute.sting.gatk.iterators.LocusIteratorByState;
|
import org.broadinstitute.sting.gatk.iterators.LocusIteratorByState;
|
||||||
import org.broadinstitute.sting.gatk.traversals.TraversalStatistics;
|
import org.broadinstitute.sting.gatk.traversals.TraversalStatistics;
|
||||||
|
|
||||||
|
|
@ -61,9 +60,6 @@ public abstract class LocusView extends LocusIterator implements View {
|
||||||
Iterator<SAMRecord> reads = new FilteringIterator(provider.getReadIterator(), new LocusStreamFilterFunc());
|
Iterator<SAMRecord> reads = new FilteringIterator(provider.getReadIterator(), new LocusStreamFilterFunc());
|
||||||
this.sourceInfo = provider.getReadIterator().getSourceInfo();
|
this.sourceInfo = provider.getReadIterator().getSourceInfo();
|
||||||
|
|
||||||
if ( GenomeAnalysisEngine.instance != null && GenomeAnalysisEngine.instance.getArguments().useLocusIteratorByHanger)
|
|
||||||
this.loci = new LocusIteratorByHanger(reads, sourceInfo);
|
|
||||||
else
|
|
||||||
this.loci = new LocusIteratorByState(reads, sourceInfo);
|
this.loci = new LocusIteratorByState(reads, sourceInfo);
|
||||||
seedNextLocus();
|
seedNextLocus();
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,226 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright (c) 2009 The Broad Institute
|
|
||||||
*
|
|
||||||
* Permission is hereby granted, free of charge, to any person
|
|
||||||
* obtaining a copy of this software and associated documentation
|
|
||||||
* files (the "Software"), to deal in the Software without
|
|
||||||
* restriction, including without limitation the rights to use,
|
|
||||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
||||||
* copies of the Software, and to permit persons to whom the
|
|
||||||
* Software is furnished to do so, subject to the following
|
|
||||||
* conditions:
|
|
||||||
*
|
|
||||||
* The above copyright notice and this permission notice shall be
|
|
||||||
* included in all copies or substantial portions of the Software.
|
|
||||||
*
|
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
||||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
|
||||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
||||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
|
||||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
|
||||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
||||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
|
||||||
* OTHER DEALINGS IN THE SOFTWARE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.iterators;
|
|
||||||
|
|
||||||
import net.sf.samtools.AlignmentBlock;
|
|
||||||
import net.sf.samtools.SAMRecord;
|
|
||||||
import org.apache.log4j.Logger;
|
|
||||||
import org.broadinstitute.sting.gatk.Reads;
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
|
||||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
|
||||||
import org.broadinstitute.sting.utils.RefHanger;
|
|
||||||
import org.broadinstitute.sting.utils.Utils;
|
|
||||||
|
|
||||||
import java.util.Iterator;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Iterator that traverses a SAM File, accumulating information on a per-locus basis
|
|
||||||
*/
|
|
||||||
public class LocusIteratorByHanger extends LocusIterator {
|
|
||||||
|
|
||||||
/**
|
|
||||||
* our log, which we want to capture anything from this class
|
|
||||||
*/
|
|
||||||
private static Logger logger = Logger.getLogger(LocusIteratorByHanger.class);
|
|
||||||
|
|
||||||
// -----------------------------------------------------------------------------------------------------------------
|
|
||||||
//
|
|
||||||
// member fields
|
|
||||||
//
|
|
||||||
// -----------------------------------------------------------------------------------------------------------------
|
|
||||||
private final PushbackIterator<SAMRecord> it;
|
|
||||||
|
|
||||||
private RefHanger<SAMRecord> readHanger = new RefHanger<SAMRecord>();
|
|
||||||
private RefHanger<Integer> offsetHanger = new RefHanger<Integer>();
|
|
||||||
final int INCREMENT_SIZE = 100;
|
|
||||||
final boolean DEBUG = false;
|
|
||||||
boolean justCleared = false;
|
|
||||||
private Reads readInfo;
|
|
||||||
|
|
||||||
// -----------------------------------------------------------------------------------------------------------------
|
|
||||||
//
|
|
||||||
// constructors and other basic operations
|
|
||||||
//
|
|
||||||
// -----------------------------------------------------------------------------------------------------------------
|
|
||||||
public LocusIteratorByHanger(final Iterator<SAMRecord> samIterator, Reads readInformation) {
|
|
||||||
this.it = new PushbackIterator<SAMRecord>(samIterator);
|
|
||||||
this.readInfo = readInformation;
|
|
||||||
}
|
|
||||||
|
|
||||||
public Iterator<AlignmentContext> iterator() {
|
|
||||||
return this;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void close() {
|
|
||||||
//this.it.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean hasNext() {
|
|
||||||
return readHanger.hasHangers() || it.hasNext();
|
|
||||||
}
|
|
||||||
|
|
||||||
public void printState() {
|
|
||||||
for (int i = 0; i < readHanger.size(); i++) {
|
|
||||||
RefHanger.Hanger rhanger = readHanger.getHanger(i);
|
|
||||||
RefHanger.Hanger ohanger = offsetHanger.getHanger(i);
|
|
||||||
|
|
||||||
logger.debug(String.format("printState(): location %s", rhanger.loc));
|
|
||||||
for (int j = 0; j < rhanger.size(); j++) {
|
|
||||||
SAMRecord read = (SAMRecord) rhanger.get(j);
|
|
||||||
int offset = (Integer) ohanger.get(j);
|
|
||||||
logger.debug(String.format(" read: %s(%d)=%s", read.getReadName(), offset, read.getReadString().charAt(offset)));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public void clear() {
|
|
||||||
logger.debug(String.format(("clear() called")));
|
|
||||||
readHanger.clear();
|
|
||||||
offsetHanger.clear();
|
|
||||||
}
|
|
||||||
|
|
||||||
// -----------------------------------------------------------------------------------------------------------------
|
|
||||||
//
|
|
||||||
// next() routine and associated collection operations
|
|
||||||
//
|
|
||||||
// -----------------------------------------------------------------------------------------------------------------
|
|
||||||
public AlignmentContext next() {
|
|
||||||
if (!currentPositionIsFullyCovered())
|
|
||||||
expandWindow(INCREMENT_SIZE, readInfo.getMaxReadsAtLocus());
|
|
||||||
|
|
||||||
if (DEBUG) {
|
|
||||||
logger.debug("in Next:");
|
|
||||||
printState();
|
|
||||||
}
|
|
||||||
|
|
||||||
RefHanger.Hanger rhanger = readHanger.popLeft();
|
|
||||||
RefHanger.Hanger ohanger = offsetHanger.popLeft();
|
|
||||||
|
|
||||||
if (rhanger.size() == 0) {
|
|
||||||
// we are in the case where there are no reads (we're inside a large indel without any reads
|
|
||||||
// so recursively call next. This is safe because having a position without any reads
|
|
||||||
// implies that there are positions to the right with reads
|
|
||||||
//System.out.printf("***** skipping reads%n");
|
|
||||||
return next();
|
|
||||||
} else {
|
|
||||||
return new AlignmentContext(rhanger.loc, rhanger.data, ohanger.data);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
protected boolean hangRead(final SAMRecord read, final int maximumPileupSize, boolean warned) {
|
|
||||||
GenomeLoc readLoc = GenomeLocParser.createGenomeLoc(read);
|
|
||||||
|
|
||||||
for (AlignmentBlock block : read.getAlignmentBlocks()) {
|
|
||||||
if (DEBUG) logger.debug(String.format("Processing block %s len=%d", block, block.getLength()));
|
|
||||||
for (int i = 0; i < block.getLength(); i++) {
|
|
||||||
// check to see if we've exceeded the maximum number of reads in the pile-up
|
|
||||||
GenomeLoc offset = GenomeLocParser.createGenomeLoc(readLoc.getContigIndex(), block.getReferenceStart() + i);
|
|
||||||
int hangerSize = (readHanger.hasLocation(offset)) ? readHanger.getHanger(offset).size() : -1;
|
|
||||||
if (hangerSize >= maximumPileupSize) {
|
|
||||||
if (!warned) {
|
|
||||||
warned = true;
|
|
||||||
Utils.warnUser("Unable to add a read, we're over the hanger limit of " + maximumPileupSize + " at location " + readHanger.getLeftLoc());
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
readHanger.expandingPut(offset, read);
|
|
||||||
offsetHanger.expandingPut(offset, block.getReadStart() + i - 1);
|
|
||||||
}
|
|
||||||
if (DEBUG) logger.debug(String.format(" # Added %s", offset));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return warned; // did we warn the user about this location?
|
|
||||||
}
|
|
||||||
|
|
||||||
private final boolean currentPositionIsFullyCovered(final GenomeLoc nextReadInStreamLoc) {
|
|
||||||
if (readHanger.isEmpty())
|
|
||||||
// If the buffer is empty, we're definitely not done
|
|
||||||
return false;
|
|
||||||
|
|
||||||
if (nextReadInStreamLoc.compareTo(readHanger.getLeftLoc()) == 1)
|
|
||||||
// the next read in the stream is beyond the left most position, so it's fully covered
|
|
||||||
return true;
|
|
||||||
else
|
|
||||||
// not fully covered
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
private final boolean currentPositionIsFullyCovered() {
|
|
||||||
if (!it.hasNext()) // if there are no more reads, we are fully covered
|
|
||||||
return true;
|
|
||||||
else {
|
|
||||||
final SAMRecord read = it.peek();
|
|
||||||
GenomeLoc readLoc = GenomeLocParser.createGenomeLoc(read);
|
|
||||||
final boolean coveredP = currentPositionIsFullyCovered(readLoc);
|
|
||||||
//System.out.printf("CoverP = %s => %b%n", readLoc, coveredP);
|
|
||||||
return coveredP;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private final void expandWindow(final int incrementSize, final int maximumPileupSize) {
|
|
||||||
// todo: reenable
|
|
||||||
if (false && incrementSize != 1) {
|
|
||||||
Utils.scareUser(String.format("BUG: IncrementSize=%d != 1, the codebase doesn't support this extension strategy yet", incrementSize));
|
|
||||||
}
|
|
||||||
|
|
||||||
if (DEBUG) {
|
|
||||||
logger.debug(String.format("entering expandWindow..., hasNext=%b", it.hasNext()));
|
|
||||||
printState();
|
|
||||||
}
|
|
||||||
Boolean warned = false; // warn them once per locus
|
|
||||||
while (it.hasNext()) {
|
|
||||||
if (DEBUG) {
|
|
||||||
logger.debug(String.format("Expanding window"));
|
|
||||||
printState();
|
|
||||||
}
|
|
||||||
|
|
||||||
SAMRecord read = it.next();
|
|
||||||
justCleared = false;
|
|
||||||
|
|
||||||
GenomeLoc readLoc = GenomeLocParser.createGenomeLoc(read);
|
|
||||||
if (DEBUG) {
|
|
||||||
logger.debug(String.format(" Expanding window sizes %d with %d : left=%s, right=%s, readLoc = %s, cmp=%d",
|
|
||||||
readHanger.size(), incrementSize,
|
|
||||||
readHanger.hasHangers() ? readHanger.getLeftLoc() : "NA",
|
|
||||||
readHanger.hasHangers() ? readHanger.getRightLoc() : "NA",
|
|
||||||
readLoc,
|
|
||||||
readHanger.hasHangers() ? readLoc.compareTo(readHanger.getLeftLoc()) : -100));
|
|
||||||
}
|
|
||||||
//if ( readHanger.size() >= incrementSize ) {
|
|
||||||
//if ( readHanger.hasHangers() && readLoc.compareTo(readHanger.getLeftLoc()) == 1) {
|
|
||||||
if (readHanger.hasHangers() && readLoc.distance(readHanger.getLeftLoc()) >= incrementSize) {
|
|
||||||
// We've collected up enough reads
|
|
||||||
it.pushback(read);
|
|
||||||
break;
|
|
||||||
} else
|
|
||||||
warned = hangRead(read,maximumPileupSize,warned);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public void remove() {
|
|
||||||
throw new UnsupportedOperationException("Can not remove records from a SAM file via an iterator!");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -31,7 +31,6 @@ import org.broadinstitute.sting.gatk.Reads;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
import org.broadinstitute.sting.utils.RefHanger;
|
|
||||||
import org.broadinstitute.sting.utils.Utils;
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
|
||||||
|
|
@ -12,6 +12,7 @@ import net.sf.picard.reference.ReferenceSequence;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
|
@Deprecated
|
||||||
public class PileupWithContextWalker extends LocusWalker<Integer, Integer> implements TreeReducible<Integer> {
|
public class PileupWithContextWalker extends LocusWalker<Integer, Integer> implements TreeReducible<Integer> {
|
||||||
@Argument(fullName="alwaysShowSecondBase",doc="If true, prints dummy bases for the second bases in the BAM file where they are missing",required=false)
|
@Argument(fullName="alwaysShowSecondBase",doc="If true, prints dummy bases for the second bases in the BAM file where they are missing",required=false)
|
||||||
public boolean alwaysShowSecondBase = false;
|
public boolean alwaysShowSecondBase = false;
|
||||||
|
|
|
||||||
|
|
@ -1,327 +0,0 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.recalibration;
|
|
||||||
|
|
||||||
import net.sf.samtools.*;
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.WalkerName;
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.Requires;
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
|
||||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
|
||||||
import org.broadinstitute.sting.utils.*;
|
|
||||||
import org.apache.log4j.Logger;
|
|
||||||
|
|
||||||
import java.util.*;
|
|
||||||
import java.io.File;
|
|
||||||
import java.io.FileNotFoundException;
|
|
||||||
|
|
||||||
@WalkerName("LogisticRecalibration")
|
|
||||||
@Requires({DataSource.READS, DataSource.REFERENCE})
|
|
||||||
public class LogisticRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
|
|
||||||
@Argument(shortName="logisticParams", doc="logistic params file", required=true)
|
|
||||||
public String logisticParamsFile;
|
|
||||||
|
|
||||||
@Argument(fullName="outputBamFile",shortName="outputBAM", doc="output BAM file", required=false)
|
|
||||||
public SAMFileWriter outputBamFile = null;
|
|
||||||
|
|
||||||
@Argument(shortName="useCache", doc="If true, uses high-performance caching of logistic regress results. Experimental", required=false)
|
|
||||||
public boolean useLogisticCache = true;
|
|
||||||
|
|
||||||
Map<Pair<String,String>, LogisticRegressor> regressors = new HashMap<Pair<String,String>, LogisticRegressor>();
|
|
||||||
private static Logger logger = Logger.getLogger(LogisticRecalibrationWalker.class);
|
|
||||||
|
|
||||||
// maps from [readGroup] -> [prevBase x base -> [cycle, qual, new qual]]
|
|
||||||
HashMap<String, HashMap<String, byte[][]>> cache = new HashMap<String, HashMap<String, byte[][]>>();
|
|
||||||
|
|
||||||
private static byte MAX_Q_SCORE = 64;
|
|
||||||
|
|
||||||
|
|
||||||
@Argument(shortName="maxReadLen", doc="Maximum allowed read length to allow during recalibration, needed for recalibration table allocation", required=false)
|
|
||||||
public static int maxReadLen = 125;
|
|
||||||
|
|
||||||
public void initialize() {
|
|
||||||
try {
|
|
||||||
List<String> lines = new xReadLines(new File(logisticParamsFile)).readLines();
|
|
||||||
ArrayList<Pair<Integer, Integer>> mapping = parseHeader(lines.get(0));
|
|
||||||
for ( String line : lines.subList(1,lines.size()) ) {
|
|
||||||
// dinuc coeff1 coeff2 ... coeff25
|
|
||||||
String[] vals = line.split("\\s+");
|
|
||||||
String readGroup = vals[0];
|
|
||||||
String dinuc = vals[1];
|
|
||||||
LogisticRegressor regressor = new LogisticRegressor(2, 4);
|
|
||||||
regressors.put(new Pair<String,String>(readGroup,dinuc), regressor);
|
|
||||||
System.out.printf("Vals = %s%n", Utils.join(",", vals));
|
|
||||||
for ( int i = 2; i < vals.length; i++ ) {
|
|
||||||
Pair<Integer, Integer> ij = mapping.get(i-2);
|
|
||||||
try {
|
|
||||||
double c = Double.parseDouble(vals[i]);
|
|
||||||
regressor.setCoefficient(ij.first, ij.second, c);
|
|
||||||
System.out.printf("Setting coefficient %s,%s => %s = %1.12f%n", readGroup, dinuc, ij, c);
|
|
||||||
} catch ( NumberFormatException e ) {
|
|
||||||
Utils.scareUser("Badly formed logistic regression header at " + vals[i] + " line: " + line );
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if ( useLogisticCache ) System.out.printf("Building recalibration cache%n");
|
|
||||||
|
|
||||||
for ( Map.Entry<Pair<String,String>, LogisticRegressor> e : regressors.entrySet() ) {
|
|
||||||
String readGroup = e.getKey().first;
|
|
||||||
String dinuc = e.getKey().second;
|
|
||||||
LogisticRegressor regressor = e.getValue();
|
|
||||||
logger.debug(String.format("Regressor: %s,%s => %s", readGroup, dinuc, regressor));
|
|
||||||
|
|
||||||
if ( useLogisticCache ) {
|
|
||||||
addToLogisticCache(readGroup, dinuc, regressor);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if ( useLogisticCache ) System.out.printf("done%n");
|
|
||||||
|
|
||||||
//System.exit(1);
|
|
||||||
} catch ( FileNotFoundException e ) {
|
|
||||||
Utils.scareUser("Cannot read/find logistic parameters file " + logisticParamsFile);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// damn I hate parsing lines
|
|
||||||
private ArrayList<Pair<Integer, Integer>> parseHeader(String headerLine) {
|
|
||||||
ArrayList<Pair<Integer, Integer>> mapping = new ArrayList<Pair<Integer, Integer>>();
|
|
||||||
|
|
||||||
String[] elts = headerLine.split("\\s+");
|
|
||||||
if ( ! "rg".equalsIgnoreCase(elts[0]) )
|
|
||||||
Utils.scareUser("Badly formatted Logistic regression header, upper left keyword must be rg: " + elts[0] + " line: " + headerLine);
|
|
||||||
if ( ! elts[1].toLowerCase().startsWith("dinuc") ) // checking only start of first field because dinuc will be followed by a version number to be checekde later
|
|
||||||
Utils.scareUser("Badly formatted Logistic regression header, second left keyword must be dinuc: " + elts[1] + " line: " + headerLine);
|
|
||||||
|
|
||||||
for ( int k = 2; k < elts.length; k++ ) {
|
|
||||||
String paramStr = elts[k];
|
|
||||||
String[] ij = paramStr.split(",");
|
|
||||||
if ( ij.length != 2 ) {
|
|
||||||
Utils.scareUser("Badly formed logistic regression header at " + paramStr + " line: " + headerLine);
|
|
||||||
} else {
|
|
||||||
try {
|
|
||||||
int i = Integer.parseInt(ij[0]);
|
|
||||||
int j = Integer.parseInt(ij[1]);
|
|
||||||
mapping.add(new Pair<Integer, Integer>(i,j));
|
|
||||||
logger.info(String.format("%d => %d/%d", k, i, j));
|
|
||||||
} catch ( NumberFormatException e ) {
|
|
||||||
Utils.scareUser("Badly formed logistic regression header at " + paramStr + " line: " + headerLine );
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return mapping;
|
|
||||||
}
|
|
||||||
|
|
||||||
private void addToLogisticCache(final String readGroup, final String dinuc, LogisticRegressor regressor) {
|
|
||||||
System.out.printf("%s x %s ", readGroup, dinuc);
|
|
||||||
byte[][] dataTable = new byte[maxReadLen][MAX_Q_SCORE];
|
|
||||||
|
|
||||||
for ( int cycle = 1; cycle < maxReadLen; cycle++ ) {
|
|
||||||
for ( byte qual = 0; qual < MAX_Q_SCORE; qual++ ) {
|
|
||||||
dataTable[cycle][qual] = regressor2newQual(regressor, cycle, qual);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
HashMap<String, byte[][]> lookup1 = cache.containsKey(readGroup) ? cache.get(readGroup) : new HashMap<String, byte[][]>();
|
|
||||||
lookup1.put(dinuc, dataTable);
|
|
||||||
cache.put(readGroup, lookup1);
|
|
||||||
}
|
|
||||||
|
|
||||||
public SAMRecord map(char[] ref, SAMRecord read) {
|
|
||||||
if ( useLogisticCache )
|
|
||||||
return mapCached(ref, read);
|
|
||||||
else
|
|
||||||
return mapOriginal(ref, read);
|
|
||||||
}
|
|
||||||
|
|
||||||
private byte cache2newQual(final String readGroup, HashMap<String, byte[][]> RGcache, byte prevBase, byte base, LogisticRegressor regressor, int cycle, byte qual) {
|
|
||||||
//System.out.printf("Lookup %s %c %c %d %d%n", readGroup, prevBase, base, cycle, qual);
|
|
||||||
//String dinuc = String.format("%c%c", (char)prevBase, (char)base);
|
|
||||||
byte[] bp = {prevBase, base};
|
|
||||||
String dinuc = new String(bp);
|
|
||||||
|
|
||||||
//byte newQualCalc = regressor2newQual(regressor, cycle, qual);
|
|
||||||
byte[][] dataTable = RGcache.get(dinuc);
|
|
||||||
|
|
||||||
if ( dataTable == null && prevBase != 'N' && base != 'N' )
|
|
||||||
throw new RuntimeException(String.format("Unmapped data table at %s %s", readGroup, dinuc));
|
|
||||||
|
|
||||||
byte newQualCached = dataTable != null ? dataTable[cycle][qual] : qual;
|
|
||||||
//if ( newQualCached != newQualCalc ) {
|
|
||||||
// throw new RuntimeException(String.format("Inconsistent quals between the cache and calculation for RG=%s: %s %d %d : %d <> %d",
|
|
||||||
// readGroup, dinuc, cycle, qual, newQualCalc, newQualCached));
|
|
||||||
//}
|
|
||||||
|
|
||||||
return newQualCached;
|
|
||||||
}
|
|
||||||
|
|
||||||
public SAMRecord mapCached(char[] ref, SAMRecord read) {
|
|
||||||
if ( read.getReadLength() > maxReadLen ) {
|
|
||||||
throw new RuntimeException("Expectedly long read, please increase maxium read len with maxReadLen parameter: " + read.format());
|
|
||||||
}
|
|
||||||
|
|
||||||
SAMRecord recalRead = read;
|
|
||||||
byte[] bases = read.getReadBases();
|
|
||||||
byte[] quals = read.getBaseQualities();
|
|
||||||
byte[] recalQuals = new byte[quals.length];
|
|
||||||
|
|
||||||
// Since we want machine direction reads not corrected positive strand reads, rev comp any negative strand reads
|
|
||||||
if (read.getReadNegativeStrandFlag()) {
|
|
||||||
bases = BaseUtils.simpleReverseComplement(bases);
|
|
||||||
quals = BaseUtils.reverse(quals);
|
|
||||||
}
|
|
||||||
|
|
||||||
String readGroup = read.getAttribute("RG").toString();
|
|
||||||
|
|
||||||
HashMap<String, byte[][]> RGcache = cache.get(readGroup);
|
|
||||||
|
|
||||||
int numBases = read.getReadLength();
|
|
||||||
recalQuals[0] = quals[0]; // can't change the first -- no dinuc
|
|
||||||
|
|
||||||
for ( int cycle = 1; cycle < numBases; cycle++ ) { // skip first and last base, qual already set because no dinuc
|
|
||||||
// Take into account that previous base is the next base in terms of machine chemistry if
|
|
||||||
// this is a negative strand
|
|
||||||
byte qual = quals[cycle];
|
|
||||||
//LogisticRegressor regressor = getLogisticRegressor(readGroup, bases[cycle - 1], bases[cycle]);
|
|
||||||
LogisticRegressor regressor = null;
|
|
||||||
byte newQual = cache2newQual(readGroup, RGcache, bases[cycle - 1], bases[cycle], regressor, cycle, qual);
|
|
||||||
recalQuals[cycle] = newQual;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (read.getReadNegativeStrandFlag())
|
|
||||||
recalQuals = BaseUtils.reverse(recalQuals);
|
|
||||||
//System.out.printf("OLD: %s%n", read.format());
|
|
||||||
read.setBaseQualities(recalQuals);
|
|
||||||
//System.out.printf("NEW: %s%n", read.format());
|
|
||||||
return recalRead;
|
|
||||||
}
|
|
||||||
|
|
||||||
// ----------------------------------------------------------------------------------------------------
|
|
||||||
//
|
|
||||||
// Old-style, expensive recalibrator
|
|
||||||
//
|
|
||||||
// ----------------------------------------------------------------------------------------------------
|
|
||||||
private LogisticRegressor getLogisticRegressor(final String readGroup, byte prevBase, byte base) {
|
|
||||||
String dinuc = String.format("%c%c", (char)prevBase, (char)base);
|
|
||||||
return regressors.get(new Pair<String,String>(readGroup,dinuc));
|
|
||||||
}
|
|
||||||
|
|
||||||
private byte regressor2newQual(LogisticRegressor regressor, int cycle, byte qual) {
|
|
||||||
byte newQual = qual;
|
|
||||||
if ( regressor != null ) { // no N or some other unexpected bp in the stream
|
|
||||||
double gamma = regressor.regress((double)cycle+1, (double)qual);
|
|
||||||
double expGamma = Math.exp(gamma);
|
|
||||||
double finalP = expGamma / (1+expGamma);
|
|
||||||
newQual = QualityUtils.probToQual(1-finalP);
|
|
||||||
}
|
|
||||||
return newQual;
|
|
||||||
}
|
|
||||||
|
|
||||||
public SAMRecord mapOriginal(char[] ref, SAMRecord read) {
|
|
||||||
SAMRecord recalRead = read;
|
|
||||||
byte[] bases = read.getReadBases();
|
|
||||||
byte[] quals = read.getBaseQualities();
|
|
||||||
byte[] recalQuals = new byte[quals.length];
|
|
||||||
|
|
||||||
// Since we want machine direction reads not corrected positive strand reads, rev comp any negative strand reads
|
|
||||||
if (read.getReadNegativeStrandFlag()) {
|
|
||||||
bases = BaseUtils.simpleReverseComplement(bases);
|
|
||||||
quals = BaseUtils.reverse(quals);
|
|
||||||
}
|
|
||||||
|
|
||||||
String readGroup = read.getAttribute("RG").toString();
|
|
||||||
int numBases = read.getReadLength();
|
|
||||||
recalQuals[0] = quals[0]; // can't change the first -- no dinuc
|
|
||||||
|
|
||||||
for ( int cycle = 1; cycle < numBases; cycle++ ) { // skip first and last base, qual already set because no dinuc
|
|
||||||
// Take into account that previous base is the next base in terms of machine chemistry if
|
|
||||||
// this is a negative strand
|
|
||||||
byte qual = quals[cycle];
|
|
||||||
LogisticRegressor regressor = getLogisticRegressor(readGroup, bases[cycle - 1], bases[cycle]);
|
|
||||||
byte newQual = regressor2newQual(regressor, cycle, qual);
|
|
||||||
recalQuals[cycle] = newQual;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (read.getReadNegativeStrandFlag())
|
|
||||||
recalQuals = BaseUtils.reverse(quals);
|
|
||||||
//System.out.printf("OLD: %s%n", read.format());
|
|
||||||
read.setBaseQualities(recalQuals);
|
|
||||||
//System.out.printf("NEW: %s%n", read.format());
|
|
||||||
return recalRead;
|
|
||||||
}
|
|
||||||
|
|
||||||
// public SAMRecord mapOriginalUnmodified(char[] ref, SAMRecord read) {
|
|
||||||
// SAMRecord recalRead = read;
|
|
||||||
// byte[] bases = read.getReadBases();
|
|
||||||
// byte[] quals = read.getBaseQualities();
|
|
||||||
// byte[] recalQuals = new byte[quals.length];
|
|
||||||
//
|
|
||||||
// // Since we want machine direction reads not corrected positive strand reads, rev comp any negative strand reads
|
|
||||||
// if (read.getReadNegativeStrandFlag()) {
|
|
||||||
// bases = BaseUtils.simpleReverseComplement(bases);
|
|
||||||
// quals = BaseUtils.reverse(quals);
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// String readGroup = read.getAttribute("RG").toString();
|
|
||||||
// int numBases = read.getReadLength();
|
|
||||||
// recalQuals[0] = quals[0]; // can't change the first -- no dinuc
|
|
||||||
// //recalQuals[numBases-1] = quals[numBases-1]; // can't change last -- no dinuc
|
|
||||||
// for ( int cycle = 1; cycle < numBases; cycle++ ) { // skip first and last base, qual already set because no dinuc
|
|
||||||
// // Take into account that previous base is the next base in terms of machine chemistry if this is a negative strand
|
|
||||||
// //int cycle = i; //read.getReadNegativeStrandFlag() ? numBases - i - 1 : i;
|
|
||||||
// String dinuc = String.format("%c%c", bases[cycle - 1], bases[cycle]);
|
|
||||||
// byte qual = quals[cycle];
|
|
||||||
// LogisticRegressor regressor = regressors.get(new Pair<String,String>(readGroup,dinuc));
|
|
||||||
// byte newQual;
|
|
||||||
//
|
|
||||||
// if ( regressor != null ) { // no N or some other unexpected bp in the stream
|
|
||||||
// double gamma = regressor.regress((double)cycle+1, (double)qual);
|
|
||||||
// double expGamma = Math.exp(gamma);
|
|
||||||
// double finalP = expGamma / (1+expGamma);
|
|
||||||
// newQual = QualityUtils.probToQual(1-finalP);
|
|
||||||
// //newQual = -10 * Math.round(logPOver1minusP)
|
|
||||||
// /*double POver1minusP = Math.pow(10, logPOver1minusP);
|
|
||||||
// P = POver1minusP / (1 + POver1minusP);*/
|
|
||||||
// //newQual = QualityUtils.probToQual(P);
|
|
||||||
//
|
|
||||||
// //newQual = (byte)Math.min(Math.round(-10*logPOver1minusP),63);
|
|
||||||
// //System.out.printf("Recal %s %d %d %d%n", dinuc, cycle, qual, newQual);
|
|
||||||
// }else{
|
|
||||||
// newQual = qual;
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// recalQuals[cycle] = newQual;
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// if (read.getReadNegativeStrandFlag())
|
|
||||||
// recalQuals = BaseUtils.reverse(quals);
|
|
||||||
// //System.out.printf("OLD: %s%n", read.format());
|
|
||||||
// read.setBaseQualities(recalQuals);
|
|
||||||
// //System.out.printf("NEW: %s%n", read.format());
|
|
||||||
// return recalRead;
|
|
||||||
// }
|
|
||||||
|
|
||||||
|
|
||||||
public void onTraversalDone(SAMFileWriter output) {
|
|
||||||
if ( output != null ) {
|
|
||||||
output.close();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public SAMFileWriter reduceInit() {
|
|
||||||
return outputBamFile;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Summarize the error rate data.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
public SAMFileWriter reduce(SAMRecord read, SAMFileWriter output) {
|
|
||||||
if ( output != null ) {
|
|
||||||
output.addAlignment(read);
|
|
||||||
} else {
|
|
||||||
out.println(read.format());
|
|
||||||
}
|
|
||||||
|
|
||||||
return output;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -1,41 +0,0 @@
|
||||||
package org.broadinstitute.sting.playground.contexts;
|
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
|
||||||
import org.broadinstitute.sting.utils.Pair;
|
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import net.sf.samtools.SAMRecord;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Created by IntelliJ IDEA.
|
|
||||||
* User: chartl
|
|
||||||
* Date: Sep 9, 2009
|
|
||||||
* Time: 10:43:23 AM
|
|
||||||
* To change this template use File | Settings | File Templates.
|
|
||||||
*/
|
|
||||||
public abstract class FilteredAlignmentContext extends AlignmentContext{
|
|
||||||
|
|
||||||
public FilteredAlignmentContext() { /* super method is called */ }
|
|
||||||
|
|
||||||
/* A partitioned alignment context must have a constructor
|
|
||||||
* method that generates the object from another alignment
|
|
||||||
* context
|
|
||||||
*/
|
|
||||||
|
|
||||||
public FilteredAlignmentContext(AlignmentContext context) {
|
|
||||||
Pair<List<SAMRecord>, List<Integer>> partitionedReads = filter(context);
|
|
||||||
this.reads = partitionedReads.getFirst();
|
|
||||||
this.offsets = partitionedReads.getSecond();
|
|
||||||
this.loc = context.getLocation();
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* A new partitioned alignment object need only specify how the reads from an Alignmentcontext
|
|
||||||
* are to be partitioned, and return the new partition in a pair.
|
|
||||||
* @Param: context - an alignment context containing reads to be partitioned
|
|
||||||
*/
|
|
||||||
public abstract Pair<List<SAMRecord>, List<Integer>> filter(AlignmentContext context);
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
@ -1,34 +0,0 @@
|
||||||
package org.broadinstitute.sting.playground.contexts;
|
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.Pair;
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
|
||||||
import net.sf.samtools.SAMRecord;
|
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Created by IntelliJ IDEA.
|
|
||||||
* User: chartl
|
|
||||||
* Date: Sep 9, 2009
|
|
||||||
* Time: 11:01:53 AM
|
|
||||||
* To change this template use File | Settings | File Templates.
|
|
||||||
*/
|
|
||||||
public class ForwardReadsContext extends FilteredAlignmentContext {
|
|
||||||
|
|
||||||
public Pair<List<SAMRecord>,List<Integer>> filter(AlignmentContext context) {
|
|
||||||
List<SAMRecord> inReads = context.getReads();
|
|
||||||
List<Integer> inOffsets = context.getOffsets();
|
|
||||||
List<SAMRecord> filteredReads = new ArrayList<SAMRecord>();
|
|
||||||
List<Integer> filteredOffsets = new ArrayList<Integer>();
|
|
||||||
|
|
||||||
for( int i = 0; i < inReads.size(); i++ ) {
|
|
||||||
if( ! inReads.get(i).getReadNegativeStrandFlag() ) {
|
|
||||||
filteredReads.add(inReads.get(i));
|
|
||||||
filteredOffsets.add(inOffsets.get(i));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return new Pair<List<SAMRecord>,List<Integer>>(filteredReads,filteredOffsets);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -1,51 +0,0 @@
|
||||||
package org.broadinstitute.sting.playground.contexts;
|
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
|
||||||
import org.broadinstitute.sting.utils.Pair;
|
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
|
|
||||||
import net.sf.samtools.SAMRecord;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Created by IntelliJ IDEA.
|
|
||||||
* User: chartl
|
|
||||||
* Date: Sep 9, 2009
|
|
||||||
* Time: 11:17:30 AM
|
|
||||||
* To change this template use File | Settings | File Templates.
|
|
||||||
*/
|
|
||||||
public class QualityScoreThresholdContext extends FilteredAlignmentContext{
|
|
||||||
/*
|
|
||||||
* @Param: qThresh - default value for thresholding
|
|
||||||
*/
|
|
||||||
protected byte qThresh = 22;
|
|
||||||
|
|
||||||
public QualityScoreThresholdContext(AlignmentContext context, byte qThresh) {
|
|
||||||
this.qThresh = qThresh;
|
|
||||||
Pair<List<SAMRecord>, List<Integer>> filteredRO = filter(context);
|
|
||||||
this.reads = filteredRO.getFirst();
|
|
||||||
this.offsets = filteredRO.getSecond();
|
|
||||||
this.loc = context.getLocation();
|
|
||||||
}
|
|
||||||
|
|
||||||
public byte getQualityScoreThreshold() {
|
|
||||||
return this.qThresh;
|
|
||||||
}
|
|
||||||
|
|
||||||
public Pair<List<SAMRecord>,List<Integer>> filter(AlignmentContext context) {
|
|
||||||
List<SAMRecord> inReads = context.getReads();
|
|
||||||
List<Integer> inOffsets = context.getOffsets();
|
|
||||||
List<SAMRecord> outReads = new ArrayList<SAMRecord>();
|
|
||||||
List<Integer> outOffsets = new ArrayList<Integer>();
|
|
||||||
|
|
||||||
for( int i = 0; i < inReads.size(); i++) {
|
|
||||||
if(inReads.get(i).getBaseQualities()[inOffsets.get(i)] >= this.qThresh) {
|
|
||||||
outReads.add(inReads.get(i));
|
|
||||||
outOffsets.add(inOffsets.get(i));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return new Pair<List<SAMRecord>,List<Integer>>(outReads,outOffsets);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -1,35 +0,0 @@
|
||||||
package org.broadinstitute.sting.playground.contexts;
|
|
||||||
|
|
||||||
import net.sf.samtools.SAMRecord;
|
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.Pair;
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Created by IntelliJ IDEA.
|
|
||||||
* User: chartl
|
|
||||||
* Date: Sep 9, 2009
|
|
||||||
* Time: 11:09:32 AM
|
|
||||||
* To change this template use File | Settings | File Templates.
|
|
||||||
*/
|
|
||||||
public class ReverseReadsContext extends FilteredAlignmentContext {
|
|
||||||
|
|
||||||
public Pair<List<SAMRecord>,List<Integer>> filter(AlignmentContext context) {
|
|
||||||
List<SAMRecord> inReads = context.getReads();
|
|
||||||
List<Integer> inOffsets = context.getOffsets();
|
|
||||||
List<SAMRecord> filteredReads = new ArrayList<SAMRecord>();
|
|
||||||
List<Integer> filteredOffsets = new ArrayList<Integer>();
|
|
||||||
|
|
||||||
for( int i = 0; i < inReads.size(); i++ ) {
|
|
||||||
if( inReads.get(i).getReadNegativeStrandFlag() ) {
|
|
||||||
filteredReads.add(inReads.get(i));
|
|
||||||
filteredOffsets.add(inOffsets.get(i));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return new Pair<List<SAMRecord>,List<Integer>>(filteredReads,filteredOffsets);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -1,67 +0,0 @@
|
||||||
package org.broadinstitute.sting.playground.gatk;
|
|
||||||
|
|
||||||
import net.sf.picard.cmdline.CommandLineProgram;
|
|
||||||
import net.sf.picard.cmdline.Usage;
|
|
||||||
import net.sf.picard.cmdline.Option;
|
|
||||||
import net.sf.picard.reference.ReferenceSequenceFileFactory;
|
|
||||||
import net.sf.picard.reference.ReferenceSequenceFile;
|
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.*;
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.*;
|
|
||||||
|
|
||||||
import java.io.*;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
|
|
||||||
public class PrepareROD extends CommandLineProgram {
|
|
||||||
// Usage and parameters
|
|
||||||
@Usage(programVersion="0.1") public String USAGE = "SAM Validator\n";
|
|
||||||
@Option(shortName="REF", doc="Reference sequence file") public File REF_FILE_ARG = null;
|
|
||||||
@Option(shortName="ROD", doc="Referenced Ordered Data file") public String ROD_FILE = null;
|
|
||||||
@Option(shortName="OUT", doc="Referenced Ordered Data file") public String OUTPUT_FILE = null;
|
|
||||||
@Option(shortName="RODNAME", doc="Name of the data") public String ROD_NAME = null;
|
|
||||||
@Option(shortName="RODTYPE", doc="Referenced Ordered Data type") public String ROD_TYPE = null;
|
|
||||||
|
|
||||||
/** Required main method implementation. */
|
|
||||||
public static void main(String[] argv) {
|
|
||||||
System.exit(new PrepareROD().instanceMain(argv));
|
|
||||||
}
|
|
||||||
|
|
||||||
protected int doWork() {
|
|
||||||
|
|
||||||
// Prepare the sort ordering w.r.t. the sequence dictionary
|
|
||||||
final ReferenceSequenceFile refFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(REF_FILE_ARG);
|
|
||||||
GenomeLocParser.setupRefContigOrdering(refFile);
|
|
||||||
|
|
||||||
Class<? extends ReferenceOrderedDatum> rodClass = ReferenceOrderedData.Types.get(ROD_TYPE.toLowerCase()).type;
|
|
||||||
|
|
||||||
ReferenceOrderedData<? extends ReferenceOrderedDatum> rod = new ReferenceOrderedData("ROD", new File(ROD_FILE), rodClass );
|
|
||||||
try {
|
|
||||||
rod.validateFile();
|
|
||||||
} catch ( Exception e ) {
|
|
||||||
//System.out.println("Validation failure: " + e);
|
|
||||||
e.printStackTrace();
|
|
||||||
}
|
|
||||||
|
|
||||||
ArrayList<ReferenceOrderedDatum> rodData = rod.readAll();
|
|
||||||
System.out.printf("Read %d elements from %s%n", rodData.size(), ROD_FILE);
|
|
||||||
ReferenceOrderedData.sortRODDataInMemory(rodData);
|
|
||||||
try {
|
|
||||||
ReferenceOrderedData.write(rodData, new File(OUTPUT_FILE));
|
|
||||||
} catch ( IOException e ) {
|
|
||||||
//System.out.println("Validation failure: " + e);
|
|
||||||
e.printStackTrace();
|
|
||||||
}
|
|
||||||
|
|
||||||
System.out.printf("Validating output file %s%n", rodData.size(), OUTPUT_FILE);
|
|
||||||
ReferenceOrderedData outputRod = new ReferenceOrderedData("ROD", new File(OUTPUT_FILE), rodClass );
|
|
||||||
try {
|
|
||||||
outputRod.validateFile();
|
|
||||||
//outputRod.hasSameContents(ROD_FILE);
|
|
||||||
} catch ( Exception e ) {
|
|
||||||
//System.out.println("Validation failure: " + e);
|
|
||||||
e.printStackTrace();
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -1,34 +0,0 @@
|
||||||
<!--
|
|
||||||
~ Copyright (c) 2009 The Broad Institute
|
|
||||||
~
|
|
||||||
~ Permission is hereby granted, free of charge, to any person
|
|
||||||
~ obtaining a copy of this software and associated documentation
|
|
||||||
~ files (the "Software"), to deal in the Software without
|
|
||||||
~ restriction, including without limitation the rights to use,
|
|
||||||
~ copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
||||||
~ copies of the Software, and to permit persons to whom the
|
|
||||||
~ Software is furnished to do so, subject to the following
|
|
||||||
~ conditions:
|
|
||||||
~
|
|
||||||
~ The above copyright notice and this permission notice shall be
|
|
||||||
~ included in all copies or substantial portions of the Software.
|
|
||||||
~
|
|
||||||
~ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
||||||
~ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
|
||||||
~ OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
||||||
~ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
|
||||||
~ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
|
||||||
~ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
||||||
~ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
|
||||||
~ OTHER DEALINGS IN THE SOFTWARE.
|
|
||||||
-->
|
|
||||||
|
|
||||||
<GATK-argument-collection>
|
|
||||||
<strictness-level>strict</strictness-level>
|
|
||||||
<intervals>/seq/references/HybSelOligos/whole_exome_refseq_coding/whole_exome_refseq_coding.targets.interval_list</intervals>
|
|
||||||
<reference-file>/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta</reference-file>
|
|
||||||
<enabled-threaded-IO>true</enabled-threaded-IO>
|
|
||||||
<unsafe>false</unsafe>
|
|
||||||
<disable-threading>false</disable-threading>
|
|
||||||
<number-of-threads>1</number-of-threads>
|
|
||||||
</GATK-argument-collection>
|
|
||||||
|
|
@ -1,74 +0,0 @@
|
||||||
package org.broadinstitute.sting.playground.somaticcoverage;
|
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.CommandLineExecutable;
|
|
||||||
import org.broadinstitute.sting.gatk.GATKArgumentCollection;
|
|
||||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
|
||||||
|
|
||||||
import java.io.File;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Copyright (c) 2009 The Broad Institute
|
|
||||||
*
|
|
||||||
* Permission is hereby granted, free of charge, to any person
|
|
||||||
* obtaining a copy of this software and associated documentation
|
|
||||||
* files (the "Software"), to deal in the Software without
|
|
||||||
* restriction, including without limitation the rights to use,
|
|
||||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
||||||
* copies of the Software, and to permit persons to whom the
|
|
||||||
* Software is furnished to do so, subject to the following
|
|
||||||
* conditions:
|
|
||||||
*
|
|
||||||
* The above copyright notice and this permission notice shall be
|
|
||||||
* included in all copies or substantial portions of the Software.
|
|
||||||
*
|
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
||||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
|
||||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
||||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
|
||||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
|
||||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
||||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
|
||||||
* OTHER DEALINGS IN THE SOFTWARE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @author aaron
|
|
||||||
*
|
|
||||||
*
|
|
||||||
* a executable command line for the Somatic Coverage Walker.
|
|
||||||
*/
|
|
||||||
public class SomaticCoverageTool extends CommandLineExecutable {
|
|
||||||
// the two sam/bam files, one for cancer, one for normal
|
|
||||||
@Argument(fullName = "bam_file", shortName = "I", doc = "The bam files, one for the tumor one for the normal", required = true)
|
|
||||||
public List<File> samFiles = new ArrayList<File>();
|
|
||||||
|
|
||||||
/** Required main method implementation. */
|
|
||||||
public static void main( String[] argv ) {
|
|
||||||
try {
|
|
||||||
SomaticCoverageTool instance = new SomaticCoverageTool();
|
|
||||||
start(instance, argv);
|
|
||||||
} catch (Exception e) {
|
|
||||||
exitSystemWithError(e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* a required method, returns the analysis name. This is usually the walker
|
|
||||||
* name with 'Walker' stripped off.
|
|
||||||
*
|
|
||||||
* @return the name of the analysis we're running
|
|
||||||
*/
|
|
||||||
protected String getAnalysisName() {
|
|
||||||
return "SomaticCoverage";
|
|
||||||
}
|
|
||||||
|
|
||||||
/** override any arguments we see fit. */
|
|
||||||
protected GATKArgumentCollection getArgumentCollection() {
|
|
||||||
GATKArgumentCollection argCollection = GATKArgumentCollection.unmarshal(getClass().getClassLoader().getResourceAsStream("SomaticCoverage.xml"));
|
|
||||||
argCollection.samFiles = samFiles;
|
|
||||||
return argCollection;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -1,68 +0,0 @@
|
||||||
package org.broadinstitute.sting.utils;
|
|
||||||
|
|
||||||
import net.sf.samtools.SAMFileReader;
|
|
||||||
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.io.File;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Created by IntelliJ IDEA.
|
|
||||||
* User: depristo
|
|
||||||
* Date: Apr 23, 2009
|
|
||||||
* Time: 3:46:30 PM
|
|
||||||
* To change this template use File | Settings | File Templates.
|
|
||||||
*/
|
|
||||||
public class LogisticRegressor {
|
|
||||||
double[][] coefficients;
|
|
||||||
int nFeatures;
|
|
||||||
int order;
|
|
||||||
|
|
||||||
public LogisticRegressor(int nFeatures, int order) {
|
|
||||||
this.nFeatures = nFeatures;
|
|
||||||
this.order = order;
|
|
||||||
|
|
||||||
if ( nFeatures != 2 )
|
|
||||||
throw new IllegalArgumentException("LogisticRegressor currently only supports 2 features :-(");
|
|
||||||
|
|
||||||
// setup coefficient matrix
|
|
||||||
coefficients = new double[order+1][order+1];
|
|
||||||
for ( int i = 0; i <= order; i++ ) {
|
|
||||||
for ( int j = 0; j <= order; j++ ) {
|
|
||||||
coefficients[i][j] = 0.0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public double[][] getCoefficients() {
|
|
||||||
return coefficients;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setCoefficient(int i, int j, double c) {
|
|
||||||
coefficients[i][j] = c;
|
|
||||||
}
|
|
||||||
|
|
||||||
public double regress(double f1, double f2) {
|
|
||||||
double v = 0.0;
|
|
||||||
for ( int i = 0; i <= order; i++ ) {
|
|
||||||
for ( int j = 0; j <= order; j++ ) {
|
|
||||||
double c = coefficients[i][j];
|
|
||||||
v += c * Math.pow(f1,i) * Math.pow(f2, j);
|
|
||||||
//System.out.printf("i=%d, j=%d, v=%f, c=%f, f1=%f, f2=%f, f1^i=%f, f2^j=%f%n", i, j, v, c, f1, f2, Math.pow(f1,i), Math.pow(f2,j));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return v;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String toString() {
|
|
||||||
StringBuilder s = new StringBuilder();
|
|
||||||
s.append(String.format("nFeatures=%d, order=%d: ", nFeatures, order));
|
|
||||||
for ( int i = 0; i <= order; i++ ) {
|
|
||||||
for ( int j = 0; j <= order; j++ ) {
|
|
||||||
s.append(" " + coefficients[i][j]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return s.toString();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -1,286 +0,0 @@
|
||||||
package org.broadinstitute.sting.utils;
|
|
||||||
|
|
||||||
import org.apache.log4j.Logger;
|
|
||||||
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Hanging data off the reference sequence
|
|
||||||
* <p/>
|
|
||||||
* Supports in effect the following data structure
|
|
||||||
* <p/>
|
|
||||||
* <-- reference bases: A T G C -->
|
|
||||||
* d d d d
|
|
||||||
* d d d d
|
|
||||||
* d d d d
|
|
||||||
* d d d
|
|
||||||
* d d
|
|
||||||
* d
|
|
||||||
* <p/>
|
|
||||||
* Where the little d's are data associated with each position in the reference.
|
|
||||||
* Supports adding and removing data to either side of the data structure, as well as
|
|
||||||
* randomly accessing data anywhere within window.
|
|
||||||
*/
|
|
||||||
public class RefHanger<T> {
|
|
||||||
|
|
||||||
// -----------------------------------------------------------------------------------------------------------------
|
|
||||||
//
|
|
||||||
// member fields
|
|
||||||
//
|
|
||||||
// -----------------------------------------------------------------------------------------------------------------
|
|
||||||
ArrayList<Hanger> hangers;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* our log, which we want to capture anything from this class
|
|
||||||
*/
|
|
||||||
private static Logger logger = Logger.getLogger(RefHanger.class);
|
|
||||||
|
|
||||||
// -----------------------------------------------------------------------------------------------------------------
|
|
||||||
//
|
|
||||||
// Info structure
|
|
||||||
//
|
|
||||||
// -----------------------------------------------------------------------------------------------------------------
|
|
||||||
public class Hanger {
|
|
||||||
public GenomeLoc loc = null;
|
|
||||||
public ArrayList<T> data = null;
|
|
||||||
|
|
||||||
public Hanger(GenomeLoc loc, ArrayList<T> data) {
|
|
||||||
this.loc = loc;
|
|
||||||
this.data = data;
|
|
||||||
}
|
|
||||||
|
|
||||||
public final ArrayList<T> getData() {
|
|
||||||
return data;
|
|
||||||
}
|
|
||||||
|
|
||||||
public final int size() {
|
|
||||||
return this.data.size();
|
|
||||||
}
|
|
||||||
|
|
||||||
public final T get(int i) {
|
|
||||||
return this.data.get(i);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// -----------------------------------------------------------------------------------------------------------------
|
|
||||||
//
|
|
||||||
// constructors and other basic operations
|
|
||||||
//
|
|
||||||
// -----------------------------------------------------------------------------------------------------------------
|
|
||||||
public RefHanger() {
|
|
||||||
hangers = new ArrayList<Hanger>();
|
|
||||||
}
|
|
||||||
|
|
||||||
public void clear() {
|
|
||||||
hangers.clear();
|
|
||||||
//System.out.printf("leftLoc is %s%n", getLeftLoc());
|
|
||||||
}
|
|
||||||
|
|
||||||
protected int getLeftOffset() {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
protected int getRightOffset() {
|
|
||||||
//return hangers.size() - 1;
|
|
||||||
return isEmpty() ? 0 : hangers.size() - 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
protected int getOffset(GenomeLoc loc) {
|
|
||||||
//System.out.printf("Loc: %s vs %s%n", loc, getLeftLoc());
|
|
||||||
return loc.minus(getLeftLoc());
|
|
||||||
}
|
|
||||||
|
|
||||||
public GenomeLoc getLeftLoc() {
|
|
||||||
return hangers.get(getLeftOffset()).loc;
|
|
||||||
}
|
|
||||||
|
|
||||||
public GenomeLoc getRightLoc() {
|
|
||||||
return hangers.get(getRightOffset()).loc;
|
|
||||||
}
|
|
||||||
|
|
||||||
public GenomeLoc getLoc(int i) {
|
|
||||||
return hangers.get(i).loc;
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean hasLocation(GenomeLoc loc) {
|
|
||||||
return !isEmpty() && loc.isBetween(getLeftLoc(), getRightLoc());
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean isEmpty() {
|
|
||||||
return hangers.isEmpty();
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean hasHangers() {
|
|
||||||
return !isEmpty();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Pops off the left most data from the structure
|
|
||||||
*
|
|
||||||
* @return
|
|
||||||
*/
|
|
||||||
public Hanger popLeft() {
|
|
||||||
assert hasHangers();
|
|
||||||
return hangers.remove(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
public void dropLeft() {
|
|
||||||
popLeft();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Looks at the left most data from the structure
|
|
||||||
*
|
|
||||||
* @return
|
|
||||||
*/
|
|
||||||
public Hanger getLeft() {
|
|
||||||
assert hasHangers();
|
|
||||||
return getHanger(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns data at offset relativePos
|
|
||||||
*
|
|
||||||
* @return
|
|
||||||
*/
|
|
||||||
public Hanger getHanger(int relativePos) {
|
|
||||||
//assert hangers.contains(relativePos) : hangers + " " + relativePos;
|
|
||||||
return hangers.get(relativePos);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns data at GenomeLoc
|
|
||||||
*
|
|
||||||
* @return
|
|
||||||
*/
|
|
||||||
public Hanger getHanger(GenomeLoc pos) {
|
|
||||||
return getHanger(getOffset(pos));
|
|
||||||
}
|
|
||||||
|
|
||||||
public int size() {
|
|
||||||
return hangers.size();
|
|
||||||
}
|
|
||||||
|
|
||||||
public String toString() {
|
|
||||||
StringBuilder s = new StringBuilder();
|
|
||||||
for ( int i = 0; i < size(); i++) {
|
|
||||||
s.append(String.format("%s => %s%n", getLoc(i), Utils.join(",", getHanger(i).data)));
|
|
||||||
}
|
|
||||||
return s.toString();
|
|
||||||
}
|
|
||||||
|
|
||||||
// -----------------------------------------------------------------------------------------------------------------
|
|
||||||
//
|
|
||||||
// Adding data to the left and right
|
|
||||||
//
|
|
||||||
// -----------------------------------------------------------------------------------------------------------------
|
|
||||||
|
|
||||||
public void pushLeft(GenomeLoc pos) {
|
|
||||||
pushLeft(pos, new ArrayList<T>());
|
|
||||||
}
|
|
||||||
|
|
||||||
public void pushLeft(GenomeLoc pos, T datum) {
|
|
||||||
pushLeft(pos, new ArrayList<T>(Arrays.asList(datum)));
|
|
||||||
}
|
|
||||||
|
|
||||||
public void pushLeft(GenomeLoc pos, ArrayList<T> data) {
|
|
||||||
hangers.add(0, new Hanger(pos, data));
|
|
||||||
}
|
|
||||||
|
|
||||||
public void pushLeft(GenomeLoc pos, List<T> data) {
|
|
||||||
hangers.add(0, new Hanger(pos, new ArrayList<T>(data)));
|
|
||||||
}
|
|
||||||
|
|
||||||
public void pushRight(GenomeLoc pos) {
|
|
||||||
pushRight(pos, new ArrayList<T>());
|
|
||||||
}
|
|
||||||
|
|
||||||
public void pushRight(GenomeLoc pos, T datum) {
|
|
||||||
pushRight(pos, new ArrayList<T>(Arrays.asList(datum)));
|
|
||||||
}
|
|
||||||
|
|
||||||
public void pushRight(GenomeLoc pos, ArrayList<T> data) {
|
|
||||||
hangers.add(new Hanger(pos, data));
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean ensurePos(GenomeLoc pos) {
|
|
||||||
if (hasLocation(pos))
|
|
||||||
return true;
|
|
||||||
else {
|
|
||||||
pushRight(pos);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public void extendData(GenomeLoc pos, T datum) {
|
|
||||||
getHanger(pos).data.add(datum);
|
|
||||||
}
|
|
||||||
|
|
||||||
public void addData(List<GenomeLoc> positions, List<T> dataByPos) {
|
|
||||||
assert (positions.size() == dataByPos.size());
|
|
||||||
|
|
||||||
for (int i = 0; i < positions.size(); i++) {
|
|
||||||
GenomeLoc pos = positions.get(i);
|
|
||||||
T datum = dataByPos.get(i);
|
|
||||||
expandingPut1(pos, datum);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public void addDataList(List<GenomeLoc> positions, List<List<T>> dataByPos) {
|
|
||||||
assert (positions.size() == dataByPos.size());
|
|
||||||
|
|
||||||
for (int i = 0; i < positions.size(); i++) {
|
|
||||||
GenomeLoc pos = positions.get(i);
|
|
||||||
for ( T datum : dataByPos.get(i) ) {
|
|
||||||
expandingPut1(pos, datum);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public void expandingPut1(final GenomeLoc loc, T datum) {
|
|
||||||
ensurePos(loc);
|
|
||||||
extendData(loc, datum);
|
|
||||||
}
|
|
||||||
|
|
||||||
public void printState() {
|
|
||||||
logger.info("Hanger: ");
|
|
||||||
for (Hanger hanger : hangers) {
|
|
||||||
logger.info(String.format(" -> %s => %s:%n", hanger.loc, Utils.join("/", hanger.data)));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Pushes locations on the right until we reach the expected position for pos.
|
|
||||||
* <p/>
|
|
||||||
* For example, if we have chr1:1 and 2 in the hanger, and we push 4 into the hangers
|
|
||||||
* this function will add 3 -> {} to the hanger too
|
|
||||||
*
|
|
||||||
* @param pos
|
|
||||||
* @param datum
|
|
||||||
*/
|
|
||||||
public void expandingPut(GenomeLoc pos, T datum) {
|
|
||||||
//System.out.printf("expandingPut(%s, %s)%n", pos, datum);
|
|
||||||
//printState();
|
|
||||||
if (isEmpty())
|
|
||||||
// we have nothing, just push right
|
|
||||||
pushRight(pos, datum);
|
|
||||||
else {
|
|
||||||
//assert pos.compareTo(getRightLoc()) == 1 : pos + " " + getRightLoc() + " => " + pos.compareTo(getRightLoc());
|
|
||||||
|
|
||||||
GenomeLoc nextRight = GenomeLocParser.nextLoc(getRightLoc());
|
|
||||||
while (pos.compareTo(nextRight) == 1) {
|
|
||||||
//printState();
|
|
||||||
//System.out.printf(" *** Extending %s, heading for %s%n", nextRight, pos);
|
|
||||||
ensurePos(nextRight);
|
|
||||||
nextRight = GenomeLocParser.nextLoc(nextRight);
|
|
||||||
}
|
|
||||||
|
|
||||||
ensurePos(pos);
|
|
||||||
extendData(pos, datum);
|
|
||||||
}
|
|
||||||
//printState();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -2,6 +2,7 @@ package org.broadinstitute.sting.utils.containers;
|
||||||
|
|
||||||
import java.util.PriorityQueue;
|
import java.util.PriorityQueue;
|
||||||
|
|
||||||
|
@Deprecated
|
||||||
public class BoundedScoringSet<E extends Comparable<E> > {
|
public class BoundedScoringSet<E extends Comparable<E> > {
|
||||||
private PriorityQueue<E> pq;
|
private PriorityQueue<E> pq;
|
||||||
private int maximumSize;
|
private int maximumSize;
|
||||||
|
|
|
||||||
|
|
@ -24,6 +24,7 @@ import java.io.FileNotFoundException;
|
||||||
* An output stream that only initializes itself the first time its used.
|
* An output stream that only initializes itself the first time its used.
|
||||||
* Needs a callback that can create an output stream.
|
* Needs a callback that can create an output stream.
|
||||||
*/
|
*/
|
||||||
|
@Deprecated
|
||||||
public class LazyFileOutputStream extends OutputStream {
|
public class LazyFileOutputStream extends OutputStream {
|
||||||
/**
|
/**
|
||||||
* Generates output files on demand.
|
* Generates output files on demand.
|
||||||
|
|
|
||||||
|
|
@ -19,6 +19,7 @@ import java.io.IOException;
|
||||||
* A stream that allows redirection to a variety of sources transparently to the
|
* A stream that allows redirection to a variety of sources transparently to the
|
||||||
* user of the class.
|
* user of the class.
|
||||||
*/
|
*/
|
||||||
|
@Deprecated
|
||||||
public class RedirectingOutputStream extends OutputStream {
|
public class RedirectingOutputStream extends OutputStream {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
||||||
|
|
@ -1,217 +0,0 @@
|
||||||
// our package
|
|
||||||
package org.broadinstitute.sting.utils;
|
|
||||||
|
|
||||||
|
|
||||||
// the imports for unit testing.
|
|
||||||
|
|
||||||
import org.broadinstitute.sting.BaseTest;
|
|
||||||
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
|
||||||
import static org.junit.Assert.assertTrue;
|
|
||||||
import org.junit.Before;
|
|
||||||
import org.junit.BeforeClass;
|
|
||||||
import org.junit.Test;
|
|
||||||
|
|
||||||
import java.io.File;
|
|
||||||
import java.io.FileNotFoundException;
|
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import net.sf.picard.reference.ReferenceSequenceFile;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Basic unit test for RefHanger
|
|
||||||
*
|
|
||||||
* interface functions:
|
|
||||||
public RefHanger();
|
|
||||||
public void clear();
|
|
||||||
protected int getLeftOffset();
|
|
||||||
protected int getRightOffset();
|
|
||||||
protected int getOffset(GenomeLoc loc);
|
|
||||||
public GenomeLoc getLeftLoc();
|
|
||||||
public GenomeLoc getRightLoc();
|
|
||||||
public boolean hasLocation(GenomeLoc loc);
|
|
||||||
public boolean isEmpty();
|
|
||||||
public boolean hasHangers();
|
|
||||||
public Hanger popLeft();
|
|
||||||
public void dropLeft();
|
|
||||||
public Hanger getLeft();
|
|
||||||
public Hanger getHanger(int relativePos);
|
|
||||||
public Hanger getHanger(GenomeLoc pos);
|
|
||||||
public int size();
|
|
||||||
public void pushLeft(GenomeLoc pos);
|
|
||||||
public void pushLeft(GenomeLoc pos, T datum);
|
|
||||||
public void pushLeft(GenomeLoc pos, ArrayList<T> data);
|
|
||||||
public void pushRight(GenomeLoc pos);
|
|
||||||
public void pushRight(GenomeLoc pos, T datum);
|
|
||||||
public void pushRight(GenomeLoc pos, ArrayList<T> data);
|
|
||||||
public boolean ensurePos(GenomeLoc pos);
|
|
||||||
public void extendData(GenomeLoc pos, T datum);
|
|
||||||
public void addData(List<GenomeLoc> positions, List<T> dataByPos);
|
|
||||||
public void expandingPut1(final GenomeLoc loc, T datum);
|
|
||||||
public void printState();
|
|
||||||
public void expandingPut(GenomeLoc pos, T datum);
|
|
||||||
|
|
||||||
*/
|
|
||||||
public class RefHangerTest extends BaseTest {
|
|
||||||
private static ReferenceSequenceFile seq;
|
|
||||||
private GenomeLoc startLoc;
|
|
||||||
private RefHanger<Integer> emptyHanger;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* chrM:1 is the start.
|
|
||||||
* 1: 1 2 3
|
|
||||||
* 2: 4 5
|
|
||||||
* 3: 6
|
|
||||||
* 4: 7 8
|
|
||||||
* 5: 9 10
|
|
||||||
*/
|
|
||||||
private static RefHanger<Integer> filledHanger;
|
|
||||||
private static List<Integer> l1, l2, l3, l4, l5;
|
|
||||||
private static GenomeLoc p1, p2, p3, p4, p5;
|
|
||||||
|
|
||||||
@BeforeClass
|
|
||||||
public static void init() throws FileNotFoundException {
|
|
||||||
// sequence
|
|
||||||
seq = new IndexedFastaSequenceFile(new File(seqLocation + "/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta"));
|
|
||||||
GenomeLocParser.setupRefContigOrdering(seq);
|
|
||||||
|
|
||||||
System.out.printf("Filled hanger is %n%s%n", makeFilledHanger());
|
|
||||||
}
|
|
||||||
|
|
||||||
private static RefHanger makeFilledHanger() {
|
|
||||||
RefHanger<Integer> filledHanger = new RefHanger<Integer>();
|
|
||||||
l1 = Arrays.asList(1, 2, 3);
|
|
||||||
l2 = Arrays.asList(4, 5);
|
|
||||||
l3 = Arrays.asList(6);
|
|
||||||
l4 = Arrays.asList(7, 8);
|
|
||||||
l5 = Arrays.asList(9, 10);
|
|
||||||
p1 = GenomeLocParser.createGenomeLoc(0, 1, 1);
|
|
||||||
p2 = GenomeLocParser.nextLoc(p1);
|
|
||||||
p3 = GenomeLocParser.nextLoc(p2);
|
|
||||||
p4 = GenomeLocParser.nextLoc(p3);
|
|
||||||
p5 = GenomeLocParser.nextLoc(p4);
|
|
||||||
|
|
||||||
filledHanger.addDataList(Arrays.asList(p1, p2, p3, p4, p5),
|
|
||||||
Arrays.asList(l1, l2, l3, l4, l5));
|
|
||||||
return filledHanger;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Before
|
|
||||||
public void setupHanger() {
|
|
||||||
startLoc = GenomeLocParser.createGenomeLoc(0, 1, 1); // chrM 1
|
|
||||||
emptyHanger = new RefHanger<Integer>();
|
|
||||||
filledHanger = makeFilledHanger();
|
|
||||||
|
|
||||||
//System.out.printf("Filled hanger is %n%s%n", filledHanger);
|
|
||||||
|
|
||||||
//GenomeLoc two = new GenomeLoc();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Tests that we got a string parameter in correctly
|
|
||||||
*/
|
|
||||||
@Test
|
|
||||||
public void testEmpty() {
|
|
||||||
logger.warn("Executing testEmpty");
|
|
||||||
assertTrue(emptyHanger.size() == 0);
|
|
||||||
assertTrue(emptyHanger.getLeftOffset() == 0);
|
|
||||||
assertTrue(emptyHanger.getRightOffset() == 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test (expected=IndexOutOfBoundsException.class )
|
|
||||||
public void testEmptyGet() {
|
|
||||||
logger.warn("Executing testEmptyGet");
|
|
||||||
emptyHanger.getHanger(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testFilledFilling() {
|
|
||||||
logger.warn("Executing testFilledFilling");
|
|
||||||
testBaseFilledHanger();
|
|
||||||
}
|
|
||||||
|
|
||||||
private void testBaseFilledHanger() {
|
|
||||||
assertTrue(filledHanger.size() == 5);
|
|
||||||
assertTrue(! filledHanger.isEmpty());
|
|
||||||
assertTrue(filledHanger.hasHangers());
|
|
||||||
assertTrue(filledHanger.getLeftOffset() == 0);
|
|
||||||
assertTrue(filledHanger.getRightOffset() == 4);
|
|
||||||
assertTrue(filledHanger.getOffset(p1) == 0);
|
|
||||||
assertTrue(filledHanger.getOffset(p2) == 1);
|
|
||||||
assertTrue(filledHanger.getOffset(p3) == 2);
|
|
||||||
assertTrue(filledHanger.getOffset(p4) == 3);
|
|
||||||
assertTrue(filledHanger.getOffset(p5) == 4);
|
|
||||||
assertTrue(filledHanger.getLeftLoc().compareTo(p1) == 0);
|
|
||||||
assertTrue(filledHanger.getRightLoc().compareTo(p5) == 0);
|
|
||||||
assertTrue(filledHanger.getLoc(2).compareTo(p3) == 0);
|
|
||||||
assertTrue(filledHanger.hasLocation(p1));
|
|
||||||
assertTrue(filledHanger.hasLocation(p2));
|
|
||||||
assertTrue(filledHanger.hasLocation(p3));
|
|
||||||
assertTrue(filledHanger.hasLocation(p4));
|
|
||||||
assertTrue(filledHanger.hasLocation(p5));
|
|
||||||
assertTrue(! filledHanger.hasLocation(GenomeLocParser.createGenomeLoc(0, 6, 6)));
|
|
||||||
|
|
||||||
assertTrue(filledHanger.getHanger(0) != null);
|
|
||||||
assertTrue(filledHanger.getHanger(1) != null);
|
|
||||||
assertTrue((int)(Integer)filledHanger.getHanger(0).data.get(0) == 1);
|
|
||||||
assertTrue((int)(Integer)filledHanger.getHanger(1).data.get(0) == 4);
|
|
||||||
assertTrue((int)(Integer)filledHanger.getHanger(p1).data.get(0) == 1);
|
|
||||||
assertTrue((int)(Integer)filledHanger.getHanger(p2).data.get(0) == 4);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testClear() {
|
|
||||||
logger.warn("Executing testClear");
|
|
||||||
assertTrue(filledHanger.size() == 5);
|
|
||||||
assertTrue(! filledHanger.isEmpty());
|
|
||||||
assertTrue(filledHanger.hasHangers());
|
|
||||||
|
|
||||||
filledHanger.clear();
|
|
||||||
|
|
||||||
assertTrue(filledHanger.size() == 0);
|
|
||||||
assertTrue(filledHanger.isEmpty());
|
|
||||||
assertTrue(! filledHanger.hasHangers());
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testPopLeft() {
|
|
||||||
logger.warn("Executing testPopLeft");
|
|
||||||
filledHanger.popLeft();
|
|
||||||
assertTrue(filledHanger.size() == 4);
|
|
||||||
assertTrue(filledHanger.getRightOffset() == 3);
|
|
||||||
assertTrue(filledHanger.getOffset(p2) == 0);
|
|
||||||
assertTrue(filledHanger.getOffset(p3) == 1);
|
|
||||||
assertTrue(filledHanger.getOffset(p4) == 2);
|
|
||||||
assertTrue(filledHanger.getOffset(p5) == 3);
|
|
||||||
assertTrue(! filledHanger.hasLocation(p1));
|
|
||||||
assertTrue(filledHanger.hasLocation(p2));
|
|
||||||
assertTrue(filledHanger.hasLocation(p3));
|
|
||||||
assertTrue(filledHanger.hasLocation(p4));
|
|
||||||
assertTrue(filledHanger.hasLocation(p5));
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testDoublePopLeft() {
|
|
||||||
logger.warn("Executing testDoublePopLeft");
|
|
||||||
filledHanger.popLeft();
|
|
||||||
filledHanger.popLeft();
|
|
||||||
assertTrue(filledHanger.size() == 3);
|
|
||||||
assertTrue(filledHanger.getRightOffset() == 2);
|
|
||||||
assertTrue(filledHanger.getOffset(p3) == 0);
|
|
||||||
assertTrue(filledHanger.getOffset(p4) == 1);
|
|
||||||
assertTrue(filledHanger.getOffset(p5) == 2);
|
|
||||||
assertTrue(! filledHanger.hasLocation(p1));
|
|
||||||
assertTrue(! filledHanger.hasLocation(p2));
|
|
||||||
assertTrue(filledHanger.hasLocation(p3));
|
|
||||||
assertTrue(filledHanger.hasLocation(p4));
|
|
||||||
assertTrue(filledHanger.hasLocation(p5));
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testPopPushLeft() {
|
|
||||||
logger.warn("Executing testPopPushLeft");
|
|
||||||
filledHanger.popLeft();
|
|
||||||
filledHanger.pushLeft(p1, l1);
|
|
||||||
testBaseFilledHanger();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Loading…
Reference in New Issue