Merge branch 'master' of ssh://nickel.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable

This commit is contained in:
Ryan Poplin 2011-09-29 07:40:08 -04:00
commit 9e32ede5b2
15 changed files with 547 additions and 287 deletions

View File

@ -10,6 +10,7 @@ import org.broadinstitute.sting.gatk.iterators.LocusIteratorByState;
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.util.Iterator;
import java.util.List;
@ -37,7 +38,7 @@ public class WindowMaker implements Iterable<WindowMaker.WindowMakerIterator>, I
/**
* The data source for reads. Will probably come directly from the BAM file.
*/
private final Iterator<AlignmentContext> sourceIterator;
private final PeekableIterator<AlignmentContext> sourceIterator;
/**
* Stores the sequence of intervals that the windowmaker should be tracking.
@ -69,7 +70,7 @@ public class WindowMaker implements Iterable<WindowMaker.WindowMakerIterator>, I
this.sourceInfo = shard.getReadProperties();
this.readIterator = iterator;
this.sourceIterator = new LocusIteratorByState(iterator,sourceInfo,genomeLocParser,sampleData);
this.sourceIterator = new PeekableIterator<AlignmentContext>(new LocusIteratorByState(iterator,sourceInfo,genomeLocParser,sampleData));
this.intervalIterator = intervals.size()>0 ? new PeekableIterator<GenomeLoc>(intervals.iterator()) : null;
}
@ -100,11 +101,6 @@ public class WindowMaker implements Iterable<WindowMaker.WindowMakerIterator>, I
*/
private final GenomeLoc locus;
/**
* Signal not to advance the iterator because we're currently sitting at the next element.
*/
private boolean atNextElement = false;
public WindowMakerIterator(GenomeLoc locus) {
this.locus = locus;
advance();
@ -124,58 +120,45 @@ public class WindowMaker implements Iterable<WindowMaker.WindowMakerIterator>, I
public boolean hasNext() {
advance();
return atNextElement;
return currentAlignmentContext != null;
}
public AlignmentContext next() {
advance();
if(!atNextElement) throw new NoSuchElementException("WindowMakerIterator is out of elements for this interval.");
if(!hasNext()) throw new NoSuchElementException("WindowMakerIterator is out of elements for this interval.");
// Prepare object state for no next element.
// Consume this alignment context.
AlignmentContext toReturn = currentAlignmentContext;
currentAlignmentContext = null;
atNextElement = false;
// Return the current element.
return toReturn;
}
private void advance() {
// No shard boundaries specified. If currentAlignmentContext has been consumed, grab the next one.
if(locus == null) {
if(!atNextElement && sourceIterator.hasNext()) {
currentAlignmentContext = sourceIterator.next();
atNextElement = true;
}
return;
}
// Can't possibly find another element. Skip out early.
if(currentAlignmentContext == null && !sourceIterator.hasNext())
return;
// Need to find the next element that is not past shard boundaries. If we travel past the edge of
// shard boundaries, stop and let the next interval pick it up.
while(sourceIterator.hasNext()) {
// Seed the current alignment context first time through the loop.
if(currentAlignmentContext == null)
currentAlignmentContext = sourceIterator.next();
while(currentAlignmentContext == null && sourceIterator.hasNext()) {
// Advance the iterator and try again.
AlignmentContext candidateAlignmentContext = sourceIterator.peek();
// Found a match.
if(locus.containsP(currentAlignmentContext.getLocation())) {
atNextElement = true;
if(locus == null) {
// No filter present. Return everything that LocusIteratorByState provides us.
currentAlignmentContext = sourceIterator.next();
}
else if(locus.isPast(candidateAlignmentContext.getLocation()))
// Found a locus before the current window; claim this alignment context and throw it away.
sourceIterator.next();
else if(locus.containsP(candidateAlignmentContext.getLocation())) {
// Found a locus within the current window; claim this alignment context and call it the next entry.
currentAlignmentContext = sourceIterator.next();
}
else if(locus.isBefore(candidateAlignmentContext.getLocation())) {
// Whoops. Skipped passed the end of the region. Iteration for this window is complete. Do
// not claim this alignment context in case it is part of the next shard.
break;
}
// Whoops. Skipped passed the end of the region. Iteration for this window is complete.
if(locus.isBefore(currentAlignmentContext.getLocation()))
break;
// No more elements to examine. Iteration is complete.
if(!sourceIterator.hasNext())
break;
// Advance the iterator and try again.
currentAlignmentContext = sourceIterator.next();
else
throw new ReviewedStingException("BUG: filtering locus does not contain, is not before, and is not past the given alignment context");
}
}
}

View File

@ -1,130 +0,0 @@
package org.broadinstitute.sting.gatk.refdata.indexer;
import net.sf.picard.reference.IndexedFastaSequenceFile;
import org.apache.log4j.Logger;
import org.broad.tribble.FeatureCodec;
import org.broad.tribble.Tribble;
import org.broad.tribble.index.Index;
import org.broad.tribble.index.IndexFactory;
import org.broad.tribble.util.LittleEndianOutputStream;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.CommandLineProgram;
import org.broadinstitute.sting.commandline.Input;
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec;
import org.broadinstitute.sting.gatk.refdata.tracks.FeatureManager;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
import java.io.File;
import java.io.FileOutputStream;
/**
* a utility class that can create an index, written to a target location. This is useful when you're unable to write to the directory
* in which an index is located, or if you'd like to pre-index files to save time.
*/
public class RMDIndexer extends CommandLineProgram {
@Argument(shortName="in", fullName="inputFile", doc="The reference meta data file to index", required = true)
File inputFileSource = null;
@Argument(shortName="t", fullName="type", doc="The reference meta data file format (e.g. vcf, bed)", required = true)
String inputFileType = null;
@Input(fullName = "referenceSequence", shortName = "R", doc = "The reference to use when indexing; this sequence will be set in the index", required = true)
public File referenceFile = null;
@Input(shortName = "i", fullName = "indexFile", doc = "Where to write the index to (as a file), if not supplied we write to <inputFile>.idx", required = false)
public File indexFile = null;
@Argument(shortName = "ba", fullName = "balanceApproach", doc="the index balancing approach to take", required=false)
IndexFactory.IndexBalanceApproach approach = IndexFactory.IndexBalanceApproach.FOR_SEEK_TIME;
private static Logger logger = Logger.getLogger(RMDIndexer.class);
private IndexedFastaSequenceFile ref = null;
private GenomeLocParser genomeLocParser = null;
@Override
protected int execute() throws Exception {
// check parameters
// ---------------------------------------------------------------------------------
// check the input parameters
if (referenceFile != null && !referenceFile.canRead())
throw new IllegalArgumentException("We can't read the reference file: "
+ referenceFile + ", check that it exists, and that you have permissions to read it");
// set the index file to the default name if they didn't specify a file
if (indexFile == null && inputFileSource != null)
indexFile = new File(inputFileSource.getAbsolutePath() + Tribble.STANDARD_INDEX_EXTENSION);
// check that we can create the output file
if (indexFile == null || indexFile.exists())
throw new IllegalArgumentException("We can't write to the index file location: "
+ indexFile + ", the index exists");
logger.info(String.format("attempting to index file: %s", inputFileSource));
logger.info(String.format("using reference: %s", ((referenceFile != null) ? referenceFile.getAbsolutePath() : "(not supplied)")));
logger.info(String.format("using type: %s", inputFileType));
logger.info(String.format("writing to location: %s", indexFile.getAbsolutePath()));
// try to index the file
// ---------------------------------------------------------------------------------
// setup the reference
ref = new CachingIndexedFastaSequenceFile(referenceFile);
genomeLocParser = new GenomeLocParser(ref);
// get a track builder
RMDTrackBuilder builder = new RMDTrackBuilder(ref.getSequenceDictionary(),genomeLocParser, ValidationExclusion.TYPE.ALL);
// find the types available to the track builders
FeatureManager.FeatureDescriptor descriptor = builder.getFeatureManager().getByName(inputFileType);
// check that the type is valid
if (descriptor == null)
throw new IllegalArgumentException("The type specified " + inputFileType + " is not a valid type. Valid type list: " + builder.getFeatureManager().userFriendlyListOfAvailableFeatures());
// create the codec
FeatureCodec codec = builder.getFeatureManager().createCodec(descriptor, "foo", genomeLocParser);
// check if it's a reference dependent feature codec
if (codec instanceof ReferenceDependentFeatureCodec)
((ReferenceDependentFeatureCodec)codec).setGenomeLocParser(genomeLocParser);
// get some timing info
long currentTime = System.currentTimeMillis();
Index index = IndexFactory.createIndex(inputFileSource, codec, approach);
// add writing of the sequence dictionary, if supplied
builder.validateAndUpdateIndexSequenceDictionary(inputFileSource, index, ref.getSequenceDictionary());
// create the output stream, and write the index
LittleEndianOutputStream stream = new LittleEndianOutputStream(new FileOutputStream(indexFile));
index.write(stream);
stream.close();
// report and exit
logger.info("Successfully wrote the index to location: " + indexFile + " in " + ((System.currentTimeMillis() - currentTime)/1000) + " seconds");
return 0; // return successfully
}
/**
* the generic call execute main
* @param argv the arguments from the command line
*/
public static void main(String[] argv) {
try {
RMDIndexer instance = new RMDIndexer();
start(instance, argv);
System.exit(CommandLineProgram.result);
} catch (Exception e) {
exitSystemWithError(e);
}
}
}

View File

@ -42,7 +42,7 @@ import java.util.List;
import java.util.Map;
/**
* List all of the samples in the info field
* List all of the polymorphic samples.
*/
public class SampleList extends InfoFieldAnnotation {

View File

@ -81,7 +81,8 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
protected IndelGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) {
super(UAC, logger);
pairModel = new PairHMMIndelErrorModel(UAC.INDEL_GAP_OPEN_PENALTY,UAC.INDEL_GAP_CONTINUATION_PENALTY,UAC.OUTPUT_DEBUG_INDEL_INFO);
pairModel = new PairHMMIndelErrorModel(UAC.INDEL_GAP_OPEN_PENALTY,UAC.INDEL_GAP_CONTINUATION_PENALTY,
UAC.OUTPUT_DEBUG_INDEL_INFO, UAC.BANDED_INDEL_COMPUTATION);
alleleList = new ArrayList<Allele>();
getAlleleListFromVCF = UAC.GenotypingMode == GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES;
minIndelCountForGenotyping = UAC.MIN_INDEL_COUNT_FOR_GENOTYPING;
@ -100,10 +101,6 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
GenomeLoc loc = ref.getLocus();
ArrayList<Allele> aList = new ArrayList<Allele>();
if (DEBUG) {
System.out.println("'''''''''''''''''''''");
System.out.println("Loc:"+loc.toString());
}
HashMap<String,Integer> consensusIndelStrings = new HashMap<String,Integer>();
int insCount = 0, delCount = 0;
@ -137,12 +134,12 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
continue;
}
if (DEBUG && p.isIndel()) {
/* if (DEBUG && p.isIndel()) {
System.out.format("Read: %s, cigar: %s, aln start: %d, aln end: %d, p.len:%d, Type:%s, EventBases:%s\n",
read.getReadName(),read.getCigar().toString(),read.getAlignmentStart(),read.getAlignmentEnd(),
p.getEventLength(),p.getType().toString(), p.getEventBases());
}
*/
String indelString = p.getEventBases();
if (p.isInsertion()) {
@ -212,7 +209,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
}
}
if (DEBUG) {
/* if (DEBUG) {
int icount = indelPileup.getNumberOfInsertions();
int dcount = indelPileup.getNumberOfDeletions();
if (icount + dcount > 0)
@ -226,7 +223,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
}
System.out.println();
}
}
} */
}
int maxAlleleCnt = 0;
@ -237,8 +234,8 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
maxAlleleCnt = curCnt;
bestAltAllele = s;
}
if (DEBUG)
System.out.format("Key:%s, number: %d\n",s,consensusIndelStrings.get(s) );
// if (DEBUG)
// System.out.format("Key:%s, number: %d\n",s,consensusIndelStrings.get(s) );
} //gdebug-
if (maxAlleleCnt < minIndelCountForGenotyping)
@ -366,10 +363,6 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
final int hsize = (int)ref.getWindow().size()-Math.abs(eventLength)-1;
final int numPrefBases= ref.getLocus().getStart()-ref.getWindow().getStart()+1;
if (DEBUG)
System.out.format("hsize: %d eventLength: %d refSize: %d, locStart: %d numpr: %d\n",hsize,eventLength,
(int)ref.getWindow().size(), loc.getStart(), numPrefBases);
//System.out.println(eventLength);
haplotypeMap = Haplotype.makeHaplotypeListFromAlleles(alleleList, loc.getStart(),
ref, hsize, numPrefBases);

View File

@ -153,6 +153,9 @@ public class UnifiedArgumentCollection {
// @Hidden
// @Argument(fullName="indel_recal_file", shortName="recalFile", required=false, doc="Filename for the input covariates table recalibration .csv file - EXPERIMENTAL, DO NO USE")
// public File INDEL_RECAL_FILE = new File("indel.recal_data.csv");
@Hidden
@Argument(fullName = "bandedIndel", shortName = "bandedIndel", doc = "Banded Indel likelihood computation", required = false)
public boolean BANDED_INDEL_COMPUTATION = false;
@Hidden
@Argument(fullName = "indelDebug", shortName = "indelDebug", doc = "Output indel debug info", required = false)
@ -199,7 +202,7 @@ public class UnifiedArgumentCollection {
// todo- arguments to remove
uac.COVERAGE_AT_WHICH_TO_ABORT = COVERAGE_AT_WHICH_TO_ABORT;
uac.IGNORE_SNP_ALLELES = IGNORE_SNP_ALLELES;
uac.BANDED_INDEL_COMPUTATION = BANDED_INDEL_COMPUTATION;
return uac;
}

View File

@ -32,6 +32,7 @@ import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.utils.Haplotype;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
@ -39,16 +40,12 @@ import org.broadinstitute.sting.utils.sam.ReadUtils;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import java.io.File;
import java.io.FileWriter;
import java.io.PrintStream;
import java.util.Arrays;
import java.util.HashMap;
import java.util.LinkedHashMap;
/*import org.broadinstitute.sting.oneoffprojects.walkers.IndelCountCovariates.Covariate;
import org.broadinstitute.sting.oneoffprojects.walkers.IndelCountCovariates.RecalDataManager;
import org.broadinstitute.sting.oneoffprojects.walkers.IndelCountCovariates.RecalDatum;
import org.broadinstitute.sting.oneoffprojects.walkers.IndelCountCovariates.RecalibrationArgumentCollection;
*/
public class PairHMMIndelErrorModel {
public static final int BASE_QUAL_THRESHOLD = 20;
@ -57,6 +54,7 @@ public class PairHMMIndelErrorModel {
private final double logGapContinuationProbability;
private boolean DEBUG = false;
private boolean bandedLikelihoods = false;
private static final int MAX_CACHED_QUAL = 127;
@ -95,11 +93,11 @@ public class PairHMMIndelErrorModel {
}
}
public PairHMMIndelErrorModel(double indelGOP, double indelGCP, boolean deb) {
public PairHMMIndelErrorModel(double indelGOP, double indelGCP, boolean deb, boolean bandedLikelihoods) {
this.logGapOpenProbability = -indelGOP/10.0; // QUAL to log prob
this.logGapContinuationProbability = -indelGCP/10.0; // QUAL to log prob
this.DEBUG = deb;
this.bandedLikelihoods = bandedLikelihoods;
// fill gap penalty table, affine naive model:
this.GAP_CONT_PROB_TABLE = new double[MAX_HRUN_GAP_IDX];
@ -164,8 +162,35 @@ public class PairHMMIndelErrorModel {
}
private void updateCell(final int indI, final int indJ, final int X_METRIC_LENGTH, final int Y_METRIC_LENGTH, byte[] readBases, byte[] readQuals, byte[] haplotypeBases,
double[] currentGOP, double[] currentGCP, double[][] matchMetricArray, double[][] XMetricArray, double[][] YMetricArray) {
if (indI > 0 && indJ > 0) {
final int im1 = indI -1;
final int jm1 = indJ - 1;
// update current point
final byte x = readBases[im1];
final byte y = haplotypeBases[jm1];
final byte qual = readQuals[im1] < 1 ? 1 : (readQuals[im1] > MAX_CACHED_QUAL ? MAX_CACHED_QUAL : readQuals[im1]);
final double pBaseRead = (x == y)? baseMatchArray[(int)qual]:baseMismatchArray[(int)qual];
matchMetricArray[indI][indJ] = MathUtils.softMax(matchMetricArray[im1][jm1] + pBaseRead, XMetricArray[im1][jm1] + pBaseRead,
YMetricArray[im1][jm1] + pBaseRead);
final double c1 = indJ == Y_METRIC_LENGTH-1 ? END_GAP_COST : currentGOP[jm1];
final double d1 = indJ == Y_METRIC_LENGTH-1 ? END_GAP_COST : currentGCP[jm1];
XMetricArray[indI][indJ] = MathUtils.softMax(matchMetricArray[im1][indJ] + c1, XMetricArray[im1][indJ] + d1);
// update Y array
final double c2 = indI == X_METRIC_LENGTH-1 ? END_GAP_COST : currentGOP[jm1];
final double d2 = indI == X_METRIC_LENGTH-1 ? END_GAP_COST : currentGCP[jm1];
YMetricArray[indI][indJ] = MathUtils.softMax(matchMetricArray[indI][jm1] + c2, YMetricArray[indI][jm1] + d2);
}
}
private double computeReadLikelihoodGivenHaplotypeAffineGaps(byte[] haplotypeBases, byte[] readBases, byte[] readQuals,
double[] currentGOP, double[] currentGCP) {
double[] currentGOP, double[] currentGCP, int eventLength) {
final int X_METRIC_LENGTH = readBases.length+1;
final int Y_METRIC_LENGTH = haplotypeBases.length+1;
@ -175,62 +200,153 @@ public class PairHMMIndelErrorModel {
final double[][] XMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
final double[][] YMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
matchMetricArray[0][0]= END_GAP_COST;//Double.NEGATIVE_INFINITY;
final double DIAG_TOL = 20; // means that max - min element in diags have to be > this number for banding to take effect.
// default initialization for all arrays
for (int i=0; i < X_METRIC_LENGTH; i++) {
Arrays.fill(matchMetricArray[i],Double.NEGATIVE_INFINITY);
Arrays.fill(YMetricArray[i],Double.NEGATIVE_INFINITY);
Arrays.fill(XMetricArray[i],Double.NEGATIVE_INFINITY);
}
for (int i=1; i < X_METRIC_LENGTH; i++) {
//initialize first column
matchMetricArray[i][0] = Double.NEGATIVE_INFINITY;
YMetricArray[i][0] = Double.NEGATIVE_INFINITY;
XMetricArray[i][0] = END_GAP_COST*(i);//logGapOpenProbability + (i-1)*logGapContinuationProbability;
XMetricArray[i][0] = END_GAP_COST*(i);
}
for (int j=1; j < Y_METRIC_LENGTH; j++) {
// initialize first row
matchMetricArray[0][j] = Double.NEGATIVE_INFINITY;
XMetricArray[0][j] = Double.NEGATIVE_INFINITY;
YMetricArray[0][j] = END_GAP_COST*(j);//logGapOpenProbability + (j-1) * logGapContinuationProbability;
YMetricArray[0][j] = END_GAP_COST*(j);
}
matchMetricArray[0][0]= END_GAP_COST;//Double.NEGATIVE_INFINITY;
XMetricArray[0][0]= YMetricArray[0][0] = 0;
for (int indI=1; indI < X_METRIC_LENGTH; indI++) {
final int im1 = indI-1;
for (int indJ=1; indJ < Y_METRIC_LENGTH; indJ++) {
final int jm1 = indJ-1;
final byte x = readBases[im1];
final byte y = haplotypeBases[jm1];
final byte qual = readQuals[im1] < 1 ? 1 : (readQuals[im1] > MAX_CACHED_QUAL ? MAX_CACHED_QUAL : readQuals[im1]);
final double pBaseRead = (x == y)? baseMatchArray[(int)qual]:baseMismatchArray[(int)qual];
double bestMetric = MathUtils.softMax(matchMetricArray[im1][jm1] + pBaseRead,
XMetricArray[im1][jm1] + pBaseRead,
YMetricArray[im1][jm1] + pBaseRead);
matchMetricArray[indI][indJ] = bestMetric;
final int numDiags = X_METRIC_LENGTH + Y_METRIC_LENGTH -1;
final int elemsInDiag = Math.min(X_METRIC_LENGTH, Y_METRIC_LENGTH);
// update X array
// State X(i,j): X(1:i) aligned to a gap in Y(1:j).
// When in last column of X, ie X(1:i) aligned to full Y, we don't want to penalize gaps
int idxWithMaxElement = 0;
final double c1 = indJ == Y_METRIC_LENGTH-1 ? END_GAP_COST : currentGOP[jm1];
final double d1 = indJ == Y_METRIC_LENGTH-1 ? END_GAP_COST : currentGCP[jm1];
bestMetric = MathUtils.softMax(matchMetricArray[im1][indJ] + c1, XMetricArray[im1][indJ] + d1);
XMetricArray[indI][indJ] = bestMetric;
double maxElementInDiag = Double.NEGATIVE_INFINITY;
// update Y array
//c = (indI==X_METRIC_LENGTH-1? END_GAP_COST: currentGOP[jm1]);
//d = (indI==X_METRIC_LENGTH-1? END_GAP_COST: currentGCP[jm1]);
final double c2 = indI == X_METRIC_LENGTH-1 ? END_GAP_COST : currentGOP[jm1];
final double d2 = indI == X_METRIC_LENGTH-1 ? END_GAP_COST : currentGCP[jm1];
bestMetric = MathUtils.softMax(matchMetricArray[indI][jm1] + c2, YMetricArray[indI][jm1] + d2);
YMetricArray[indI][indJ] = bestMetric;
for (int diag=0; diag < numDiags; diag++) {
// compute default I and J start positions at edge of diagonals
int indI = 0;
int indJ = diag;
if (diag >= Y_METRIC_LENGTH ) {
indI = diag-(Y_METRIC_LENGTH-1);
indJ = Y_METRIC_LENGTH-1;
}
// first pass: from max element to edge
int idxLow = bandedLikelihoods? idxWithMaxElement : 0;
// reset diag max value before starting
if (bandedLikelihoods) {
maxElementInDiag = Double.NEGATIVE_INFINITY;
// set indI, indJ to correct values
indI += idxLow;
indJ -= idxLow;
if (indI >= X_METRIC_LENGTH || indJ < 0) {
idxLow--;
indI--;
indJ++;
}
}
for (int el = idxLow; el < elemsInDiag; el++) {
updateCell(indI, indJ, X_METRIC_LENGTH, Y_METRIC_LENGTH, readBases, readQuals, haplotypeBases,
currentGOP, currentGCP, matchMetricArray, XMetricArray, YMetricArray);
// update max in diagonal
if (bandedLikelihoods) {
final double bestMetric = MathUtils.max(matchMetricArray[indI][indJ], XMetricArray[indI][indJ], YMetricArray[indI][indJ]);
// check if we've fallen off diagonal value by threshold
if (bestMetric > maxElementInDiag) {
maxElementInDiag = bestMetric;
idxWithMaxElement = el;
}
else if (bestMetric < maxElementInDiag - DIAG_TOL)
break; // done w/current diagonal
}
indI++;
if (indI >=X_METRIC_LENGTH )
break;
indJ--;
if (indJ <= 0)
break;
}
if (bandedLikelihoods && idxLow > 0) {
// now do second part in opposite direction
indI = 0;
indJ = diag;
if (diag >= Y_METRIC_LENGTH ) {
indI = diag-(Y_METRIC_LENGTH-1);
indJ = Y_METRIC_LENGTH-1;
}
indI += idxLow-1;
indJ -= idxLow-1;
for (int el = idxLow-1; el >= 0; el--) {
updateCell(indI, indJ, X_METRIC_LENGTH, Y_METRIC_LENGTH, readBases, readQuals, haplotypeBases,
currentGOP, currentGCP, matchMetricArray, XMetricArray, YMetricArray);
// update max in diagonal
final double bestMetric = MathUtils.max(matchMetricArray[indI][indJ], XMetricArray[indI][indJ], YMetricArray[indI][indJ]);
// check if we've fallen off diagonal value by threshold
if (bestMetric > maxElementInDiag) {
maxElementInDiag = bestMetric;
idxWithMaxElement = el;
}
else if (bestMetric < maxElementInDiag - DIAG_TOL)
break; // done w/current diagonal
indJ++;
if (indJ >= Y_METRIC_LENGTH )
break;
indI--;
if (indI <= 0)
break;
}
}
// if (DEBUG)
// System.out.format("Max:%4.1f el:%d\n",maxElementInDiag, idxWithMaxElement);
}
final int bestI = X_METRIC_LENGTH - 1, bestJ = Y_METRIC_LENGTH - 1;
final double bestMetric = MathUtils.softMax(matchMetricArray[bestI][bestJ],
XMetricArray[bestI][bestJ],
YMetricArray[bestI][bestJ]);
if (DEBUG)
System.out.format("Likelihood: %5.4f\n", bestMetric);
XMetricArray[bestI][bestJ],
YMetricArray[bestI][bestJ]);
/*
if (DEBUG) {
PrintStream outx, outy, outm, outs;
double[][] sumMetrics = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
try {
outx = new PrintStream("../../UGOptim/datax.txt");
outy = new PrintStream("../../UGOptim/datay.txt");
outm = new PrintStream("../../UGOptim/datam.txt");
outs = new PrintStream("../../UGOptim/datas.txt");
double metrics[] = new double[3];
for (int indI=0; indI < X_METRIC_LENGTH; indI++) {
for (int indJ=0; indJ < Y_METRIC_LENGTH; indJ++) {
metrics[0] = matchMetricArray[indI][indJ];
metrics[1] = XMetricArray[indI][indJ];
metrics[2] = YMetricArray[indI][indJ];
//sumMetrics[indI][indJ] = MathUtils.softMax(metrics);
outx.format("%4.1f ", metrics[1]);
outy.format("%4.1f ", metrics[2]);
outm.format("%4.1f ", metrics[0]);
outs.format("%4.1f ", MathUtils.softMax(metrics));
}
outx.println(); outm.println();outy.println(); outs.println();
}
outm.close(); outx.close(); outy.close();
} catch (java.io.IOException e) { throw new UserException("bla");}
}
*/
return bestMetric;
@ -262,11 +378,6 @@ public class PairHMMIndelErrorModel {
LinkedHashMap<Allele,double[]> gapOpenProbabilityMap = new LinkedHashMap<Allele,double[]>();
LinkedHashMap<Allele,double[]> gapContProbabilityMap = new LinkedHashMap<Allele,double[]>();
if (DEBUG) {
System.out.println("Reference bases:");
System.out.println(new String(ref.getBases()));
}
// will context dependent probabilities based on homopolymer run. Probabilities are filled based on total complete haplotypes.
// todo -- refactor into separate function
for (Allele a: haplotypeMap.keySet()) {
@ -278,13 +389,6 @@ public class PairHMMIndelErrorModel {
// get homopolymer length profile for current haplotype
int[] hrunProfile = new int[haplotypeBases.length];
getContextHomopolymerLength(haplotypeBases,hrunProfile);
if (DEBUG) {
System.out.println("Haplotype bases:");
System.out.println(new String(haplotypeBases));
for (int i=0; i < hrunProfile.length; i++)
System.out.format("%d",hrunProfile[i]);
System.out.println();
}
fillGapProbabilities(hrunProfile, contextLogGapOpenProbabilities, contextLogGapContinuationProbabilities);
gapOpenProbabilityMap.put(a,contextLogGapOpenProbabilities);
@ -414,18 +518,16 @@ public class PairHMMIndelErrorModel {
// ok, we now figured out total number of clipped bases on both ends.
// Figure out where we want to place the haplotype to score read against
if (DEBUG)
System.out.format("numStartClippedBases: %d numEndClippedBases: %d WinStart:%d WinStop:%d start: %d stop: %d readLength: %d\n",
numStartClippedBases, numEndClippedBases, ref.getWindow().getStart(), ref.getWindow().getStop(), start, stop, read.getReadLength());
/*
if (DEBUG)
System.out.format("numStartClippedBases: %d numEndClippedBases: %d WinStart:%d WinStop:%d start: %d stop: %d readLength: %d\n",
numStartClippedBases, numEndClippedBases, ref.getWindow().getStart(), ref.getWindow().getStop(), start, stop, read.getReadLength());
*/
LinkedHashMap<Allele,Double> readEl = new LinkedHashMap<Allele,Double>();
if (numStartClippedBases + numEndClippedBases >= unclippedReadBases.length) {
if (DEBUG)
System.out.println("BAD READ!!");
int j=0;
for (Allele a: haplotypeMap.keySet()) {
readEl.put(a,0.0);
@ -440,13 +542,6 @@ public class PairHMMIndelErrorModel {
byte[] readQuals = Arrays.copyOfRange(unclippedReadQuals,numStartClippedBases,
unclippedReadBases.length-numEndClippedBases);
double[] recalCDP = null;
if (DEBUG) {
System.out.println("Read bases:");
System.out.println(new String(readBases));
}
int j=0;
for (Allele a: haplotypeMap.keySet()) {
@ -465,14 +560,10 @@ public class PairHMMIndelErrorModel {
byte[] haplotypeBases = Arrays.copyOfRange(haplotype.getBasesAsBytes(),
(int)indStart, (int)indStop);
if (DEBUG) {
System.out.println("Haplotype to test:");
System.out.println(new String(haplotypeBases));
}
final double[] currentContextGOP = Arrays.copyOfRange(gapOpenProbabilityMap.get(a), (int)indStart, (int)indStop);
final double[] currentContextGCP = Arrays.copyOfRange(gapContProbabilityMap.get(a), (int)indStart, (int)indStop);
final double readLikelihood = computeReadLikelihoodGivenHaplotypeAffineGaps(haplotypeBases, readBases, readQuals, currentContextGOP, currentContextGCP);
final double readLikelihood = computeReadLikelihoodGivenHaplotypeAffineGaps(haplotypeBases, readBases, readQuals,
currentContextGOP, currentContextGCP, eventLength);
readEl.put(a,readLikelihood);
readLikelihoods[readIdx][j++] = readLikelihood;

View File

@ -159,10 +159,11 @@ public class CycleCovariate implements StandardCovariate {
*/
// todo -- this should be put into a common place in the code base
private static List<String> PACBIO_NAMES = Arrays.asList("PACBIO");
private static List<String> ILLUMINA_NAMES = Arrays.asList("ILLUMINA", "SLX", "SOLEXA");
private static List<String> SOLID_NAMES = Arrays.asList("SOLID");
private static List<String> LS454_NAMES = Arrays.asList("454");
private static List<String> COMPLETE_GENOMICS_NAMES = Arrays.asList("COMPLETE");
private static List<String> PACBIO_NAMES = Arrays.asList("PACBIO");
private static boolean isPlatform(SAMRecord read, List<String> names) {
String pl = read.getReadGroup().getPlatform().toUpperCase();
@ -176,11 +177,10 @@ public class CycleCovariate implements StandardCovariate {
public void getValues(SAMRecord read, Comparable[] comparable) {
//-----------------------------
// ILLUMINA and SOLID
// Illumina, Solid, PacBio, and Complete Genomics
//-----------------------------
if( isPlatform(read, ILLUMINA_NAMES) || isPlatform(read, SOLID_NAMES) || isPlatform(read, PACBIO_NAMES)) {
if( isPlatform(read, ILLUMINA_NAMES) || isPlatform(read, SOLID_NAMES) || isPlatform(read, PACBIO_NAMES) || isPlatform(read, COMPLETE_GENOMICS_NAMES) ) {
final int init;
final int increment;
if( !read.getReadNegativeStrandFlag() ) {
@ -222,6 +222,11 @@ public class CycleCovariate implements StandardCovariate {
cycle += increment;
}
}
//-----------------------------
// 454
//-----------------------------
else if ( isPlatform(read, LS454_NAMES) ) { // Some bams have "LS454" and others have just "454"
final int readLength = read.getReadLength();

View File

@ -245,8 +245,7 @@ public class RecalDataManager {
readGroup.setPlatform( RAC.DEFAULT_PLATFORM );
((GATKSAMRecord)read).setReadGroup( readGroup );
} else {
throw new UserException.MalformedBAM(read, "The input .bam file contains reads with no read group. First observed at read with name = " + read.getReadName() +
" Users must set both the default read group using the --default_read_group <String> argument and the default platform using the --default_platform <String> argument." );
throw new UserException.MalformedBAM(read, "The input .bam file contains reads with no read group. First observed at read with name = " + read.getReadName() );
}
}
@ -271,8 +270,7 @@ public class RecalDataManager {
}
readGroup.setPlatform( RAC.DEFAULT_PLATFORM );
} else {
throw new UserException.MalformedBAM(read, "The input .bam file contains reads with no platform information. First observed at read with name = " + read.getReadName() +
" Users must set the default platform using the --default_platform <String> argument." );
throw new UserException.MalformedBAM(read, "The input .bam file contains reads with no platform information. First observed at read with name = " + read.getReadName() );
}
}
}

View File

@ -26,6 +26,7 @@
package org.broadinstitute.sting.gatk.walkers.recalibration;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Hidden;
/**
* Created by IntelliJ IDEA.
@ -41,22 +42,29 @@ public class RecalibrationArgumentCollection {
//////////////////////////////////
// Shared Command Line Arguments
//////////////////////////////////
@Hidden
@Argument(fullName="default_read_group", shortName="dRG", required=false, doc="If a read has no read group then default to the provided String.")
public String DEFAULT_READ_GROUP = null;
@Hidden
@Argument(fullName="default_platform", shortName="dP", required=false, doc="If a read has no platform then default to the provided String. Valid options are illumina, 454, and solid.")
public String DEFAULT_PLATFORM = null;
@Hidden
@Argument(fullName="force_read_group", shortName="fRG", required=false, doc="If provided, the read group ID of EVERY read will be forced to be the provided String. This is useful to collapse all data into a single read group.")
public String FORCE_READ_GROUP = null;
@Hidden
@Argument(fullName="force_platform", shortName="fP", required=false, doc="If provided, the platform of EVERY read will be forced to be the provided String. Valid options are illumina, 454, and solid.")
public String FORCE_PLATFORM = null;
@Hidden
@Argument(fullName = "window_size_nqs", shortName="nqs", doc="The window size used by MinimumNQSCovariate for its calculation", required=false)
public int WINDOW_SIZE = 5;
/**
* This window size tells the module in how big of a neighborhood around the current base it should look for the minimum base quality score.
*/
@Hidden
@Argument(fullName = "homopolymer_nback", shortName="nback", doc="The number of previous bases to look at in HomopolymerCovariate", required=false)
public int HOMOPOLYMER_NBACK = 7;
@Hidden
@Argument(fullName = "exception_if_no_tile", shortName="throwTileException", doc="If provided, TileCovariate will throw an exception when no tile can be found. The default behavior is to use tile = -1", required=false)
public boolean EXCEPTION_IF_NO_TILE = false;

View File

@ -0,0 +1,56 @@
package org.broadinstitute.sting.utils;
import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import java.util.Comparator;
/**
*
* @author Mauricio Carneiro
* @since 9/28/11
*/
public class GenomeLocComparator implements Comparator<GenomeLoc> {
/**
* compares genomeLoc's contigs
*
* @param gl1 the genome loc to compare contigs
* @param gl2 the genome loc to compare contigs
* @return 0 if equal, -1 if gl2.contig is greater, 1 if gl1.contig is greater
*/
@Requires("gl2 != null")
@Ensures("result == 0 || result == 1 || result == -1")
public final int compareContigs( GenomeLoc gl1, GenomeLoc gl2 ) {
if (gl1.contigIndex == gl2.contigIndex)
return 0;
else if (gl1.contigIndex > gl2.contigIndex)
return 1;
return -1;
}
@Requires("gl2 != null")
@Ensures("result == 0 || result == 1 || result == -1")
public int compare ( GenomeLoc gl1, GenomeLoc gl2 ) {
int result = 0;
if ( gl1 == gl2 ) {
result = 0;
}
else if(GenomeLoc.isUnmapped(gl1))
result = 1;
else if(GenomeLoc.isUnmapped(gl2))
result = -1;
else {
final int cmpContig = compareContigs(gl1, gl2);
if ( cmpContig != 0 ) {
result = cmpContig;
} else {
if ( gl1.getStart() < gl2.getStart() ) result = -1;
if ( gl1.getStart() > gl2.getStart() ) result = 1;
}
}
return result;
}
}

View File

@ -68,7 +68,7 @@ public class ReadClipper {
return result;
}
private SAMRecord hardClipByReferenceCoordinates(int refStart, int refStop) {
protected SAMRecord hardClipByReferenceCoordinates(int refStart, int refStop) {
int start = (refStart < 0) ? 0 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStart, ReadUtils.ClippingTail.RIGHT_TAIL);
int stop = (refStop < 0) ? read.getReadLength() - 1 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStop, ReadUtils.ClippingTail.LEFT_TAIL);
@ -93,8 +93,9 @@ public class ReadClipper {
public SAMRecord hardClipBothEndsByReferenceCoordinates(int left, int right) {
if (left == right)
return new SAMRecord(read.getHeader());
this.read = hardClipByReferenceCoordinates(right, -1);
return hardClipByReferenceCoordinates(-1, left);
SAMRecord leftTailRead = hardClipByReferenceCoordinates(right, -1);
ReadClipper clipper = new ReadClipper(leftTailRead);
return clipper.hardClipByReferenceCoordinatesLeftTail(left);
}
public SAMRecord hardClipLowQualEnds(byte lowQual) {

View File

@ -325,11 +325,14 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler {
return Arrays.toString((Object[])value);
else
throw new RuntimeException("Unexpected array type in prettyPrintValue. Value was " + value + " type is " + type);
} else if ( RodBinding.class.isAssignableFrom(value.getClass() ) )
} else if ( RodBinding.class.isAssignableFrom(value.getClass() ) ) {
// annoying special case to handle the UnBound() constructor
return "none";
return value.toString();
} else if ( value instanceof String ) {
return value.equals("") ? "\"\"" : value;
} else {
return value.toString();
}
}
/**
@ -432,11 +435,17 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler {
* Returns a Pair of (main, synonym) names for argument with fullName s1 and
* shortName s2. The main is selected to be the longest of the two, provided
* it doesn't exceed MAX_DISPLAY_NAME, in which case the shorter is taken.
* @param s1
* @param s2
* @return
*
* @param s1 the short argument name without -, or null if not provided
* @param s2 the long argument name without --, or null if not provided
* @return A pair of fully qualified names (with - or --) for the argument. The first
* element is the primary display name while the second (potentially null) is a
* synonymous name.
*/
Pair<String, String> displayNames(String s1, String s2) {
s1 = s1 == null ? null : "-" + s1;
s2 = s2 == null ? null : "--" + s2;
if ( s1 == null ) return new Pair<String, String>(s2, null);
if ( s2 == null ) return new Pair<String, String>(s1, null);
@ -510,7 +519,7 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler {
*/
protected Map<String, Object> docForArgument(FieldDoc fieldDoc, ArgumentSource source, ArgumentDefinition def) {
Map<String, Object> root = new HashMap<String, Object>();
Pair<String, String> names = displayNames("-" + def.shortName, "--" + def.fullName);
Pair<String, String> names = displayNames(def.shortName, def.fullName);
root.put("name", names.getFirst() );

View File

@ -825,7 +825,7 @@ public class ReadUtils {
* @return the read coordinate corresponding to the requested reference coordinate. (see warning!)
*/
@Requires({"refCoord >= read.getUnclippedStart()", "refCoord <= read.getUnclippedEnd()"})
@Ensures({"result >= 0", "result < read.getReadLength()"})
@Ensures({"result.getFirst() >= 0", "result.getFirst() < read.getReadLength()"})
public static Pair<Integer, Boolean> getReadCoordinateForReferenceCoordinate(SAMRecord read, int refCoord) {
int readBases = 0;
int refBases = 0;
@ -893,6 +893,10 @@ public class ReadUtils {
// base before the deletion (see warning in function contracts)
else if (fallsInsideDeletion && !endsWithinCigar)
readBases += shift - 1;
// If we reached our goal inside a deletion then we must backtrack to the last base before the deletion
else if (fallsInsideDeletion && endsWithinCigar)
readBases--;
}
}

View File

@ -118,6 +118,20 @@ public abstract class LocusViewTemplate extends BaseTest {
testReadsInContext(view, shard.getGenomeLocs(), Collections.singletonList(read));
}
@Test
public void readAndLocusOverlapAtLastBase() {
SAMRecord read = buildSAMRecord("chr1", 1, 5);
SAMRecordIterator iterator = new SAMRecordIterator(read);
Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 5, 5)));
WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs(),new SampleDataSource());
WindowMaker.WindowMakerIterator window = windowMaker.next();
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), genomeLocParser, window.getLocus(), window, null, null);
LocusView view = createView(dataProvider);
testReadsInContext(view, shard.getGenomeLocs(), Collections.singletonList(read));
}
@Test
public void readOverlappingStartTest() {
SAMRecord read = buildSAMRecord("chr1", 1, 10);

View File

@ -0,0 +1,225 @@
/*
* Copyright (c) 2010 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.clipreads;
import net.sf.samtools.*;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
import java.util.LinkedList;
import java.util.List;
/**
* Created by IntelliJ IDEA.
* User: roger
* Date: 9/28/11
* Time: 9:54 PM
* To change this template use File | Settings | File Templates.
*/
public class ReadClipperUnitTest extends BaseTest {
// TODO: Add error messages on failed tests
SAMRecord read, expected;
ReadClipper readClipper;
final static String BASES = "ACTG";
final static String QUALS = "!+5?"; //ASCII values = 33,43,53,63
@BeforeClass
public void init() {
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000);
read = ArtificialSAMUtils.createArtificialRead(header, "read1", 0, 1, BASES.length());
read.setReadUnmappedFlag(true);
read.setReadBases(new String(BASES).getBytes());
read.setBaseQualityString(new String(QUALS));
readClipper = new ReadClipper(read);
}
@Test
public void testHardClipBothEndsByReferenceCoordinates() {
logger.warn("Executing testHardClipBothEndsByReferenceCoordinates");
//Clip whole read
Assert.assertEquals(readClipper.hardClipBothEndsByReferenceCoordinates(0,0), new SAMRecord(read.getHeader()));
//clip 1 base
expected = readClipper.hardClipBothEndsByReferenceCoordinates(0,3);
Assert.assertEquals(expected.getReadBases(), BASES.substring(1,3).getBytes());
Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(1,3));
Assert.assertEquals(expected.getCigarString(), "1H2M1H");
}
@Test
public void testHardClipByReadCoordinates() {
logger.warn("Executing testHardClipByReadCoordinates");
//Clip whole read
Assert.assertEquals(readClipper.hardClipByReadCoordinates(0,3), new SAMRecord(read.getHeader()));
//clip 1 base at start
expected = readClipper.hardClipByReadCoordinates(0,0);
Assert.assertEquals(expected.getReadBases(), BASES.substring(1,4).getBytes());
Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(1,4));
Assert.assertEquals(expected.getCigarString(), "1H3M");
//clip 1 base at end
expected = readClipper.hardClipByReadCoordinates(3,3);
Assert.assertEquals(expected.getReadBases(), BASES.substring(0,3).getBytes());
Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(0,3));
Assert.assertEquals(expected.getCigarString(), "3M1H");
//clip 2 bases at start
expected = readClipper.hardClipByReadCoordinates(0,1);
Assert.assertEquals(expected.getReadBases(), BASES.substring(2,4).getBytes());
Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(2,4));
Assert.assertEquals(expected.getCigarString(), "2H2M");
//clip 2 bases at end
expected = readClipper.hardClipByReadCoordinates(2,3);
Assert.assertEquals(expected.getReadBases(), BASES.substring(0,2).getBytes());
Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(0,2));
Assert.assertEquals(expected.getCigarString(), "2M2H");
}
@Test
public void testHardClipByReferenceCoordinates() {
logger.warn("Executing testHardClipByReferenceCoordinates");
//Clip whole read
Assert.assertEquals(readClipper.hardClipByReferenceCoordinates(1,4), new SAMRecord(read.getHeader()));
//clip 1 base at start
expected = readClipper.hardClipByReferenceCoordinates(-1,1);
Assert.assertEquals(expected.getReadBases(), BASES.substring(1,4).getBytes());
Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(1,4));
Assert.assertEquals(expected.getCigarString(), "1H3M");
//clip 1 base at end
expected = readClipper.hardClipByReferenceCoordinates(3,-1);
Assert.assertEquals(expected.getReadBases(), BASES.substring(0,3).getBytes());
Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(0,3));
Assert.assertEquals(expected.getCigarString(), "3M1H");
//clip 2 bases at start
expected = readClipper.hardClipByReferenceCoordinates(-1,2);
Assert.assertEquals(expected.getReadBases(), BASES.substring(2,4).getBytes());
Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(2,4));
Assert.assertEquals(expected.getCigarString(), "2H2M");
//clip 2 bases at end
expected = readClipper.hardClipByReferenceCoordinates(2,-1);
Assert.assertEquals(expected.getReadBases(), BASES.substring(0,2).getBytes());
Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(0,2));
Assert.assertEquals(expected.getCigarString(), "2M2H");
}
@Test
public void testHardClipByReferenceCoordinatesLeftTail() {
logger.warn("Executing testHardClipByReferenceCoordinatesLeftTail");
//Clip whole read
Assert.assertEquals(readClipper.hardClipByReferenceCoordinatesLeftTail(4), new SAMRecord(read.getHeader()));
//clip 1 base at start
expected = readClipper.hardClipByReferenceCoordinatesLeftTail(1);
Assert.assertEquals(expected.getReadBases(), BASES.substring(1,4).getBytes());
Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(1,4));
Assert.assertEquals(expected.getCigarString(), "1H3M");
//clip 2 bases at start
expected = readClipper.hardClipByReferenceCoordinatesLeftTail(2);
Assert.assertEquals(expected.getReadBases(), BASES.substring(2,4).getBytes());
Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(2,4));
Assert.assertEquals(expected.getCigarString(), "2H2M");
}
@Test
public void testHardClipByReferenceCoordinatesRightTail() {
logger.warn("Executing testHardClipByReferenceCoordinatesRightTail");
//Clip whole read
Assert.assertEquals(readClipper.hardClipByReferenceCoordinatesRightTail(1), new SAMRecord(read.getHeader()));
//clip 1 base at end
expected = readClipper.hardClipByReferenceCoordinatesRightTail(3);
Assert.assertEquals(expected.getReadBases(), BASES.substring(0,3).getBytes());
Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(0,3));
Assert.assertEquals(expected.getCigarString(), "3M1H");
//clip 2 bases at end
expected = readClipper.hardClipByReferenceCoordinatesRightTail(2);
Assert.assertEquals(expected.getReadBases(), BASES.substring(0,2).getBytes());
Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(0,2));
Assert.assertEquals(expected.getCigarString(), "2M2H");
}
@Test
public void testHardClipLowQualEnds() {
logger.warn("Executing testHardClipByReferenceCoordinates");
//Clip whole read
Assert.assertEquals(readClipper.hardClipLowQualEnds((byte)64), new SAMRecord(read.getHeader()));
//clip 1 base at start
expected = readClipper.hardClipLowQualEnds((byte)34);
Assert.assertEquals(expected.getReadBases(), BASES.substring(1,4).getBytes());
Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(1,4));
Assert.assertEquals(expected.getCigarString(), "1H3M");
//clip 2 bases at start
expected = readClipper.hardClipLowQualEnds((byte)44);
Assert.assertEquals(expected.getReadBases(), BASES.substring(2,4).getBytes());
Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(2,4));
Assert.assertEquals(expected.getCigarString(), "2H2M");
// Reverse Quals sequence
readClipper.getRead().setBaseQualityString("?5+!"); // 63,53,43,33
//clip 1 base at end
expected = readClipper.hardClipLowQualEnds((byte)34);
Assert.assertEquals(expected.getReadBases(), BASES.substring(0,3).getBytes());
Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(0,3));
Assert.assertEquals(expected.getCigarString(), "3M1H");
//clip 2 bases at end
expected = readClipper.hardClipLowQualEnds((byte)44);
Assert.assertEquals(expected.getReadBases(), BASES.substring(0,2).getBytes());
Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(0,2));
Assert.assertEquals(expected.getCigarString(), "2M2H");
// revert Qual sequence
readClipper.getRead().setBaseQualityString(QUALS);
}
}