deleting accidentally committed junk
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4464 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
b3d81984aa
commit
39e373af6e
|
|
@ -88,7 +88,7 @@ public class VariantContextAdaptors {
|
|||
return null;
|
||||
Allele refAllele = Allele.create(DbSNPHelper.getReference(dbsnp), true);
|
||||
|
||||
if ( DbSNPHelper.isSNP(dbsnp) || DbSNPHelper.isIndel(dbsnp) || dbsnp.getVariantType().contains("mixed") ) {
|
||||
if ( DbSNPHelper.isSNP(dbsnp) || DbSNPHelper.isIndel(dbsnp) || DbSNPHelper.isMNP(dbsnp) || dbsnp.getVariantType().contains("mixed") ) {
|
||||
// add the reference allele
|
||||
List<Allele> alleles = new ArrayList<Allele>();
|
||||
alleles.add(refAllele);
|
||||
|
|
|
|||
|
|
@ -45,6 +45,9 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceDataSource;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalUtils;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalFileMergingIterator;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
|
||||
|
|
@ -84,6 +87,17 @@ public class IndelGenotyperV2Walker extends ReadWalker<Integer,Integer> {
|
|||
// @Argument(fullName="vcf_format", shortName="vcf", doc="generate output file in VCF format", required=false)
|
||||
// boolean FORMAT_VCF = false;
|
||||
|
||||
@Argument(fullName = "genotype_intervals", shortName = "genotype",
|
||||
doc = "Calls will be made at each position within the specified interval(s), whether there is an indel or it's the ref", required = false)
|
||||
public String genotypeIntervalsFile = null;
|
||||
|
||||
@Argument(fullName="genotypeIntervalsAreNotSorted", shortName="giNotSorted", required=false,
|
||||
doc="This tool assumes that the genotyping interval list (--genotype_intervals) is sorted; "+
|
||||
"if the list turns out to be unsorted, it will throw an exception. "+
|
||||
"Use this argument when your interval list is not sorted to instruct the IndelGenotyper "+
|
||||
"to sort and keep it in memory (increases memory usage!).")
|
||||
protected boolean GENOTYPE_NOT_SORTED = false;
|
||||
|
||||
@Argument(fullName="somatic", shortName="somatic",
|
||||
doc="Perform somatic calls; two input alignment files (-I option) must be specified. Calls are performed from the second file (\"tumor\") against the first one (\"normal\").", required=false)
|
||||
boolean call_somatic = false;
|
||||
|
|
@ -167,6 +181,11 @@ public class IndelGenotyperV2Walker extends ReadWalker<Integer,Integer> {
|
|||
private SAMRecord lastRead;
|
||||
private byte[] refBases;
|
||||
private ReferenceDataSource refData;
|
||||
private Iterator<GenomeLoc> genotypeIntervals = null;
|
||||
private GenomeLocSortedSet traverseIntervals = null; // these are the traversal intervals passed with -L option (if any)
|
||||
|
||||
// the current interval in the list of intervals, for which we want to do full genotyping
|
||||
private GenomeLoc currentGenotypeInterval = null;
|
||||
|
||||
// "/humgen/gsa-scr1/GATK_Data/refGene.sorted.txt"
|
||||
|
||||
|
|
@ -272,6 +291,22 @@ public class IndelGenotyperV2Walker extends ReadWalker<Integer,Integer> {
|
|||
normalSamples.add(rg.getSample());
|
||||
}
|
||||
}
|
||||
if ( genotypeIntervalsFile != null ) {
|
||||
|
||||
traverseIntervals = getToolkit().getIntervals();
|
||||
|
||||
if ( ! GENOTYPE_NOT_SORTED && IntervalUtils.isIntervalFile(genotypeIntervalsFile)) {
|
||||
// prepare to read intervals one-by-one, as needed (assuming they are sorted).
|
||||
genotypeIntervals = new IntervalFileMergingIterator(
|
||||
new java.io.File(genotypeIntervalsFile), IntervalMergingRule.OVERLAPPING_ONLY );
|
||||
} else {
|
||||
// read in the whole list of intervals for cleaning
|
||||
GenomeLocSortedSet locs = IntervalUtils.sortAndMergeIntervals(
|
||||
IntervalUtils.parseIntervalArguments(Arrays.asList(genotypeIntervalsFile)), IntervalMergingRule.OVERLAPPING_ONLY);
|
||||
genotypeIntervals = locs.iterator();
|
||||
}
|
||||
currentGenotypeInterval = genotypeIntervals.hasNext() ? genotypeIntervals.next() : null;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,69 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.oneoffprojects.utils;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: asivache
|
||||
* Date: Aug 3, 2010
|
||||
* Time: 4:10:42 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
|
||||
/** A simple utility class that encapsulates information about a single alignment (offset, strand, overlap, mismatch count).
|
||||
*
|
||||
*/
|
||||
public class AlignmentInfo {
|
||||
private int offset;
|
||||
private int overlap;
|
||||
private int mm ;
|
||||
private Assembly a = null;
|
||||
|
||||
private static int RIDICULOUSLY_LARGE_NUMBER = 1000000000;
|
||||
|
||||
public AlignmentInfo() {
|
||||
offset = 0;
|
||||
mm = RIDICULOUSLY_LARGE_NUMBER;
|
||||
overlap = -1;
|
||||
a = null;
|
||||
}
|
||||
|
||||
public AlignmentInfo(int mm, int offset, boolean isRc, int overlap, Assembly a) {
|
||||
this.offset = (isRc ? (-offset-1) : offset );
|
||||
this.overlap = overlap;
|
||||
this.mm = mm;
|
||||
this.a = a;
|
||||
}
|
||||
|
||||
boolean isAligned() { return mm < RIDICULOUSLY_LARGE_NUMBER; }
|
||||
|
||||
public boolean isNegativeStrand() { return offset < 0; }
|
||||
public Assembly getAssembly() { return a; }
|
||||
public int getOffset() { return ( offset < 0 ? (-offset-1) : offset ); }
|
||||
public int getMismatchCount() { return mm; }
|
||||
public int getOverlap() { return overlap; }
|
||||
public double getMismatchRate() { return isAligned() ? ((double)mm)/overlap : 1.0; }
|
||||
}
|
||||
|
|
@ -0,0 +1,111 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.oneoffprojects.utils;
|
||||
|
||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
|
||||
public class AlignmentList implements Iterable<AlignmentInfo> {
|
||||
private int best_mm = 1000000000;
|
||||
private int next_best_mm = 1000000000;
|
||||
private List<AlignmentInfo> als = null;
|
||||
private int next_best_count = 0;
|
||||
private int best_overlap = 0;
|
||||
|
||||
private AlignmentStrategy strategy = null;
|
||||
|
||||
public AlignmentList(AlignmentStrategy s) {
|
||||
this.strategy = s;
|
||||
best_mm = 1000000000;
|
||||
next_best_mm = 1000000000;
|
||||
best_overlap = 0;
|
||||
als = new ArrayList<AlignmentInfo>(1);
|
||||
}
|
||||
|
||||
public boolean isAligned() {
|
||||
return best_mm < 1000000000;
|
||||
}
|
||||
|
||||
public List<AlignmentInfo> getAlignments() { return als; }
|
||||
|
||||
public int size() { return als.size(); }
|
||||
|
||||
public Iterator<AlignmentInfo> iterator() { return als.iterator(); }
|
||||
|
||||
// public void tryAdd(int mm, int offset, boolean isRc, int overlap) {
|
||||
// tryAdd(new AlignmentInfo(mm,offset,isRc,overlap));
|
||||
// }
|
||||
|
||||
public void tryAdd(AlignmentInfo ai) {
|
||||
AlignmentStrategy.Action a = strategy.action(ai,this) ;
|
||||
switch ( a ) {
|
||||
case DISCARD: break;
|
||||
case REPLACE_BEST:
|
||||
next_best_mm = best_mm;
|
||||
next_best_count = size();
|
||||
als.clear();
|
||||
als.add(ai);
|
||||
best_mm = ai.getMismatchCount();
|
||||
best_overlap = ai.getOverlap();
|
||||
break;
|
||||
case ADD_BEST:
|
||||
als.add(ai);
|
||||
if ( ai.getMismatchCount() < best_mm ) best_mm = ai.getMismatchCount();
|
||||
if ( ai.getOverlap() > best_overlap) best_overlap = ai.getOverlap();
|
||||
break;
|
||||
case REPLACE_NEXTBEST:
|
||||
next_best_mm = ai.getMismatchCount();
|
||||
next_best_count = 1;
|
||||
break;
|
||||
case ADD_NEXTBEST:
|
||||
next_best_count++;
|
||||
if ( ai.getMismatchCount() < next_best_mm ) next_best_mm = ai.getMismatchCount();
|
||||
break;
|
||||
default: throw new StingException("Unrecognized action requested: "+a);
|
||||
}
|
||||
}
|
||||
|
||||
public void tryAddAll(AlignmentList al) {
|
||||
for( AlignmentInfo ai : al) {
|
||||
tryAdd(ai);
|
||||
}
|
||||
}
|
||||
|
||||
public int getBestMMCount() { return best_mm; }
|
||||
public int getBestOverlap() { return best_overlap; }
|
||||
public int getBestHitCount() { return als.size() ; }
|
||||
public int getNextBestHitCount() { return next_best_count; }
|
||||
public int getNextBestMMCount() { return next_best_mm; }
|
||||
// public int getOverlap() { return overlap; }
|
||||
// public int getOffset() { return offset; }
|
||||
// public boolean isNegativeStrand() { return rc; }
|
||||
|
||||
// public double getMismatchRate() { return isAligned() ? ((double)best_mm)/overlap : 1.0 ; }
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,45 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.oneoffprojects.utils;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: asivache
|
||||
* Date: Aug 3, 2010
|
||||
* Time: 2:53:43 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public interface AlignmentStrategy {
|
||||
enum Action {
|
||||
DISCARD,
|
||||
REPLACE_BEST,
|
||||
ADD_BEST,
|
||||
REPLACE_NEXTBEST,
|
||||
ADD_NEXTBEST
|
||||
};
|
||||
|
||||
public Action action(AlignmentInfo alignment, AlignmentList currentList);
|
||||
}
|
||||
|
|
@ -0,0 +1,442 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.oneoffprojects.utils;
|
||||
|
||||
import org.broadinstitute.sting.utils.collections.PrimitivePair;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
|
||||
import net.sf.samtools.util.StringUtil;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: asivache
|
||||
* Date: Aug 3, 2010
|
||||
* Time: 2:20:22 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class Assembly {
|
||||
private byte[] consensus;
|
||||
private short[] coverage;
|
||||
private short[] mismatches;
|
||||
private short [][] base_counts;
|
||||
|
||||
private boolean debug = false;
|
||||
private List<String> seq_ids;
|
||||
private List<byte []> seqs;
|
||||
private List<Integer> seq_offsets;
|
||||
|
||||
private KmerIndex lookup; // assembled consensus sequence is indexed here
|
||||
|
||||
private int hookedAt = -1; // if set, specifies start on the ref of the assembled consensus sequence
|
||||
|
||||
private static List<PrimitivePair.Int> EMPTY_KMER_LIST = new ArrayList<PrimitivePair.Int>(0);
|
||||
|
||||
private int K = 15;
|
||||
|
||||
private AlignmentStrategy strategy = null;
|
||||
|
||||
/** Creates new assembly seeded with the specified sequence; default key length (15) is used.
|
||||
*
|
||||
* @param seq
|
||||
* @param id
|
||||
*/
|
||||
public Assembly(final byte[] seq, String id) {
|
||||
this(15,seq,id);
|
||||
}
|
||||
|
||||
/** Creates new assembly seeded with the specified sequence and sets kmer (key) length K for the internally maintained
|
||||
* lookup index tables.
|
||||
* @param K
|
||||
* @param seq
|
||||
* @param id
|
||||
*/
|
||||
public Assembly(int K, final byte[] seq, String id) {
|
||||
this.K = K;
|
||||
seq_ids = new ArrayList<String>();
|
||||
seq_offsets = new ArrayList<Integer>();
|
||||
seqs = new ArrayList<byte[]>();
|
||||
seq_ids.add(id);
|
||||
seq_offsets.add(0);
|
||||
seqs.add(seq);
|
||||
consensus = Arrays.copyOf(seq,seq.length);
|
||||
coverage = new short[seq.length];
|
||||
Arrays.fill(coverage,(short)1);
|
||||
mismatches = new short[seq.length]; // filled with 0's
|
||||
base_counts = new short[4][seq.length];
|
||||
for ( int i = 0 ; i < seq.length ; i++ ) {
|
||||
int j = BaseUtils.simpleBaseToBaseIndex(seq[i]);
|
||||
if ( j != -1) base_counts[j][i] = 1;
|
||||
}
|
||||
lookup = new KmerIndex(K,seq);
|
||||
strategy = new DefaultAlignmentStrategy();
|
||||
}
|
||||
|
||||
/** Creates new assembly seeded with the specified sequence; default key length (15) is used and the position on the
|
||||
* reference of the entire assembly is set (as assemblly grows, position on the ref will be updated properly).
|
||||
*
|
||||
* @param seq
|
||||
* @param id
|
||||
*/
|
||||
public Assembly(final byte[] seq, String id, int posOnRef) {
|
||||
this(seq,id);
|
||||
hookedAt = posOnRef;
|
||||
}
|
||||
|
||||
/** Creates new assembly seeded the specified sequence and sets kmer (key) length K for the internally maintained
|
||||
* lookup index tables. Parameter <code>posOnRef</code> specifies the (initial) position of the entire assembly on the
|
||||
* ref; as the assembly grows, the position on ref will be updated properly.
|
||||
* @param K
|
||||
* @param seq
|
||||
* @param id
|
||||
*/
|
||||
public Assembly(int K, final byte[] seq, String id, int posOnRef) {
|
||||
this(K,seq,id);
|
||||
hookedAt = posOnRef;
|
||||
}
|
||||
|
||||
/** Returns total number of sequences currently held by this assembly.
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public int getNumSeqs() { return seqs.size() ; }
|
||||
|
||||
/** Attempts to align <code>seq</code> to this assembly's consensus. Does NOT add
|
||||
* the sequence to the consensus even if it aligns! This methods returns a list of alternative
|
||||
* best alignments found (according to the strategy used) in a newly allocated AlignmentList object.
|
||||
* @param seq sequence to align to this consensus
|
||||
* @param tryRC if true, will try aligning both seq and its reverse complement; otherwise
|
||||
* only forward alignment will be attempted (i.e. best placement of the seq, as it is provided,
|
||||
* along the assembled consensus sequence)
|
||||
* @return a newly allocated alignment list; returned list can be empty if no alignments are found
|
||||
*/
|
||||
public AlignmentList align(final byte[] seq, boolean tryRC) {
|
||||
return align(seq,tryRC,null);
|
||||
}
|
||||
|
||||
/** Attempts to align <code>seq</code> to this assembly's consensus. Does NOT add
|
||||
* the sequence to the consensus even if it aligns! This method uses existing list of alignments
|
||||
* (which can contain alignments to a different assembly) and updates it as necessary if a better alignment
|
||||
* (or multiple better alignments) than the one(s) already held in the list is found. Reference to the
|
||||
* <i>same</i> alignment list object is returned: this method modifies it's argument. If alignment list argument
|
||||
* is <code>null</code>, new alignment list object will be allocated and returned by this method.
|
||||
*
|
||||
* @param seq sequence to align to this consensus
|
||||
* @param tryRC if true, will try aligning both seq and its reverse complement; otherwise
|
||||
* only forward alignment will be attempted (i.e. best placement of the seq, as it is provided,
|
||||
* along the assembled consensus sequence)
|
||||
* @return a newly allocated alignment list; returned list can be empty if no alignments are found
|
||||
*/
|
||||
public AlignmentList align(final byte[] seq, boolean tryRC, AlignmentList a) {
|
||||
if ( debug ) System.out.println("Assembly:: aligning sequence of length "+seq.length+"; tryRC="+tryRC+"; K="+K);
|
||||
|
||||
List<PrimitivePair.Int> fw_kmers = KmerIndex.toKeyOffsetList(K,seq);
|
||||
|
||||
if ( debug ) {
|
||||
for( PrimitivePair.Int kmer: fw_kmers) {
|
||||
System.out.println("id="+kmer.getFirst()+" seq="+new String(KmerIndex.idToSeq(K,kmer.getFirst()))+" offset on seq="+kmer.getSecond());
|
||||
}
|
||||
}
|
||||
|
||||
byte [] rc_seq = (tryRC ? BaseUtils.simpleReverseComplement(seq) : null );
|
||||
List<PrimitivePair.Int> rc_kmers = (tryRC ? KmerIndex.toKeyOffsetList(K,rc_seq) : EMPTY_KMER_LIST );
|
||||
|
||||
if ( a == null ) a = new AlignmentList(strategy);
|
||||
|
||||
// i is the position on the sequence seq or on its reverse complement
|
||||
for(PrimitivePair.Int kmer : fw_kmers ) {
|
||||
|
||||
List<Integer> offsets = lookup.getOffsets(kmer.first);
|
||||
if ( offsets != null ) {
|
||||
// kmer present in consensus sequence
|
||||
for ( int s : offsets ) { // s=offset of the current kmer on the assembled consensus
|
||||
int trial_offset = s - kmer.second; // offset of the seq on the assembled consensus suggested by current kmer/offset
|
||||
int trial_mm = countMismatches(seq,trial_offset,a.getNextBestMMCount());
|
||||
a.tryAdd(new AlignmentInfo(trial_mm,trial_offset,false,overlap(trial_offset,seq.length),this));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for ( PrimitivePair.Int kmer : rc_kmers ) {
|
||||
|
||||
List<Integer> offsets = lookup.getOffsets(kmer.first);
|
||||
if ( offsets != null ) {
|
||||
// kmer present in consensus sequence
|
||||
for ( int s : offsets ) {
|
||||
int trial_offset = s - kmer.second;
|
||||
int trial_mm = countMismatches(rc_seq,trial_offset,a.getNextBestMMCount());
|
||||
a.tryAdd(new AlignmentInfo(trial_mm,trial_offset,true,overlap(trial_offset,seq.length),this));
|
||||
}
|
||||
}
|
||||
}
|
||||
return a;
|
||||
}
|
||||
|
||||
public void setDebug(boolean d) { this.debug = d; lookup.setDebug(d);}
|
||||
|
||||
public int numSequences() { return seq_ids.size(); }
|
||||
|
||||
private int overlap(int offset, int seq_length ) {
|
||||
return Math.min(consensus.length,offset+seq_length)-Math.max(0,offset);
|
||||
}
|
||||
|
||||
private int countMismatches(final byte seq[], int offset, int cutoff) {
|
||||
int mm = 0;
|
||||
|
||||
int i ;
|
||||
if ( offset >= 0 ) i = 0;
|
||||
else { i = (-offset); offset = 0; }
|
||||
for ( ; i < seq.length && offset < consensus.length ; i++ , offset++ ) {
|
||||
if ( seq[i] != consensus[offset] ) {
|
||||
mm++;
|
||||
if ( mm > cutoff ) break;
|
||||
}
|
||||
}
|
||||
|
||||
return mm;
|
||||
}
|
||||
|
||||
public byte[] getConsensus() { return consensus; }
|
||||
|
||||
public int getPosOnRef() { return hookedAt; }
|
||||
|
||||
public int getConsensusLength() { return consensus.length; }
|
||||
|
||||
public List<Integer> getOffsets() { return seq_offsets; }
|
||||
public int getOffset(int i) { return seq_offsets.get(i); }
|
||||
|
||||
public List<String> getSeqIds() { return Collections.unmodifiableList(seq_ids); }
|
||||
|
||||
/** Adds specified sequence to this assembly according to the provided
|
||||
* alignment information. Will properly update consensus sequence of this assembly
|
||||
* and all associated information (mismatches, base counts etc)
|
||||
* @param seq
|
||||
* @param id
|
||||
* @param a
|
||||
*/
|
||||
public void add(final byte[] seq, String id, AlignmentInfo a) {
|
||||
|
||||
if ( ! a.isAligned() ) throw new StingException("Can not add sequence to the assembly: provided alignment is empty");
|
||||
|
||||
seq_ids.add(id);
|
||||
|
||||
int offset = a.getOffset();
|
||||
int oldConsensusLength = consensus.length;
|
||||
|
||||
byte [] seq_to_add = ( a.isNegativeStrand() ? BaseUtils.simpleReverseComplement(seq) : seq);
|
||||
|
||||
seqs.add(seq_to_add);
|
||||
|
||||
int pos_on_seq = 0;
|
||||
int pos_on_cons = 0;
|
||||
|
||||
int leftExtension = 0; // how many bases we added to the consensus on the left
|
||||
int rightExtension = 0; // how many bases we added to the consensus on the right
|
||||
|
||||
if ( offset < 0 ) {
|
||||
// if sequence sticks out to the left of the current consensus:
|
||||
|
||||
leftExtension = -offset;
|
||||
for(int i = 0 ; i < seq_offsets.size() ; i++ ) {
|
||||
// we are going to extend consensus to the left, so we need to update all current offsets:
|
||||
seq_offsets.set(i,seq_offsets.get(i)+leftExtension);
|
||||
}
|
||||
|
||||
if ( hookedAt > 0 ) hookedAt -= leftExtension;
|
||||
// extend consensus and associated arrays to the left :
|
||||
|
||||
consensus = Utils.extend(consensus,offset,(byte)0); // remember, offset is negative here, extending to the left
|
||||
coverage = Utils.extend(coverage,offset,(short)1) ;
|
||||
mismatches = Utils.extend(mismatches,offset,(short)0);
|
||||
for ( int i = 0 ; i < 4 ; i++ ) base_counts[i] = Utils.extend(base_counts[i],offset,(short)0);
|
||||
|
||||
for ( int j = 0 ; j < -offset ; j++ ) {
|
||||
consensus[j] = seq_to_add[j];
|
||||
int b = BaseUtils.simpleBaseToBaseIndex(seq_to_add[j]);
|
||||
if ( b != -1 ) base_counts[b][j]=1;
|
||||
}
|
||||
|
||||
pos_on_seq = pos_on_cons = -offset;
|
||||
|
||||
offset = 0;
|
||||
}
|
||||
if ( offset > 0 ) pos_on_cons = offset;
|
||||
|
||||
seq_offsets.add(offset);
|
||||
|
||||
boolean consensus_changed = false;
|
||||
|
||||
for ( ; pos_on_seq < seq_to_add.length && pos_on_cons < consensus.length ; pos_on_seq++, pos_on_cons++ ) {
|
||||
coverage[pos_on_cons]++;
|
||||
final byte base = seq_to_add[pos_on_seq];
|
||||
final int b = BaseUtils.simpleBaseToBaseIndex(base);
|
||||
if ( b != -1 ) {
|
||||
// if base on seq is not a regular base, there is nothing to do;
|
||||
// otherwise count mismatches and optionally update consensus if current base tips the balance
|
||||
base_counts[b][pos_on_cons]++;
|
||||
int maxcount = 0;
|
||||
int maxb = -1;
|
||||
for ( int j = 0 ; j < 4 ; j++ ) {
|
||||
if ( base_counts[j][pos_on_cons] > maxcount ) {
|
||||
maxcount = base_counts[j][pos_on_cons];
|
||||
maxb = j;
|
||||
}
|
||||
}
|
||||
// we are guaranteed here that maxb != -1 since we just added one regular base (the current one)
|
||||
// few lines above...
|
||||
byte newbase = BaseUtils.baseIndexToSimpleBase(maxb);
|
||||
if ( newbase != consensus[pos_on_cons] ) { // need to change the consensus base (will recompute mismatches)
|
||||
consensus[pos_on_cons] = newbase;
|
||||
consensus_changed = true;
|
||||
mismatches[pos_on_cons] = 0;
|
||||
for ( int i = 0 ; i < 4 ; i++ ) {
|
||||
if ( i == maxb ) continue;
|
||||
mismatches[pos_on_cons] += base_counts[i][pos_on_cons];
|
||||
}
|
||||
} else { // consensus base did not change; just increment mismatches if current sequence's base differs from consensus
|
||||
if ( base != consensus[pos_on_cons]) mismatches[pos_on_cons]++;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// Last step: if sequence sticks out of current consensus on the right, we need to extend the latter:
|
||||
|
||||
if ( pos_on_seq < seq_to_add.length ) {
|
||||
// sequence sticks out of consensus to the right
|
||||
rightExtension = seq_to_add.length - pos_on_seq;
|
||||
consensus = Utils.extend(consensus,rightExtension,(byte)0);
|
||||
coverage = Utils.extend(coverage,rightExtension,(short)1);
|
||||
mismatches = Utils.extend(mismatches,rightExtension,(short)0);
|
||||
for ( int i = 0 ; i < 4 ; i++ ) base_counts[i] = Utils.extend(base_counts[i],rightExtension,(short)0);
|
||||
for ( ; pos_on_seq < seq_to_add.length ; pos_on_seq++, pos_on_cons++ ) {
|
||||
byte base = seq_to_add[pos_on_seq];
|
||||
consensus[pos_on_cons] = base;
|
||||
int b = BaseUtils.simpleBaseToBaseIndex(base);
|
||||
if ( b != -1 ) base_counts[b][pos_on_cons] = base;
|
||||
}
|
||||
}
|
||||
|
||||
// finally, the new sequence we just added could have mismatches that tip some consensus bases into new values;
|
||||
// let's catch those cases:
|
||||
|
||||
|
||||
for ( int i = 0 ; i < consensus.length ; i++ ) {
|
||||
byte cons_base = consensus[i];
|
||||
int b = BaseUtils.simpleBaseToBaseIndex(cons_base);
|
||||
}
|
||||
|
||||
// there is probably a better way, but for now we just recompute the whole lookup table when consensus
|
||||
// changes somewhere in the middle (if we want to be samrt we need to identify just the kmers that changed
|
||||
// and find/change them in the lookup table).
|
||||
if ( consensus_changed ) {
|
||||
lookup.clear();
|
||||
lookup.index(consensus);
|
||||
} else {
|
||||
if ( leftExtension > 0 || rightExtension > 0 ) lookup.updateIndex(consensus,leftExtension,oldConsensusLength);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
public String toAlignmentString(boolean mismatchesOnly, boolean printNames) {
|
||||
|
||||
int maxNameLength = 0;
|
||||
int spacing=3;
|
||||
|
||||
if ( printNames ) {
|
||||
for ( String n : seq_ids ) if ( n.length() > maxNameLength ) maxNameLength++;
|
||||
}
|
||||
|
||||
StringBuilder b = new StringBuilder();
|
||||
if ( printNames ) b.append(Utils.dupString(' ',maxNameLength+spacing));
|
||||
b.append(new String(consensus));
|
||||
b.append('\n');
|
||||
|
||||
for ( int j = 0; j < seqs.size() ; j++ ) {
|
||||
int offset = seq_offsets.get(j);
|
||||
byte [] seq = seqs.get(j);
|
||||
|
||||
if ( printNames ) {
|
||||
b.append(seq_ids.get(j));
|
||||
b.append(Utils.dupString(' ',maxNameLength-seq_ids.get(j).length()+spacing));
|
||||
}
|
||||
|
||||
for ( int i = 0 ; i < offset ; i++ ) b.append(' ');
|
||||
|
||||
for ( int i = 0 ; i < seq.length ; i++ ) {
|
||||
|
||||
byte base = seq[i];
|
||||
if ( mismatchesOnly && base == consensus[i+offset] ) {
|
||||
b.append('.');
|
||||
} else b.append((char)base);
|
||||
}
|
||||
b.append('\n');
|
||||
}
|
||||
return b.toString();
|
||||
}
|
||||
|
||||
|
||||
public static void testMe(String [] argv ) {
|
||||
byte [] seq1 = "ACGTTGCGTGGTTCACTGCAGTAACTGACTGATGCA".getBytes();
|
||||
byte [] seq2 = "GCGTGGTTTACTGCAGTAACTGACTGATGCAACGTGTTTG".getBytes();
|
||||
byte [] seq3 = "GGNTGACGTTGCGTGGTTTACTGCAGTAACTGACT".getBytes();
|
||||
byte [] seq4 = "NNNTTNCGTGGTTTACTGCAGTAACTGACTGATGCA".getBytes();
|
||||
|
||||
Assembly a = new Assembly(seq1,"1");
|
||||
|
||||
AlignmentList al = a.align(seq2,false);
|
||||
if ( al.isAligned() ) System.out.println("seq 2 aligned");
|
||||
else System.out.println("seq 2 did NOT align");
|
||||
|
||||
if ( al.size() == 1 ) a.add(seq2,"2",al.getAlignments().get(0));
|
||||
else System.out.println("Multiple alignments found for seq 2");
|
||||
|
||||
al = a.align(seq3,false);
|
||||
if ( al.isAligned() ) System.out.println("seq 3 aligned");
|
||||
else System.out.println("seq 3 did NOT align");
|
||||
|
||||
if ( al.size() == 1 ) a.add(seq3,"3",al.getAlignments().get(0));
|
||||
else System.out.println("Multiple alignments found for seq 3");
|
||||
|
||||
al = a.align(seq4,false);
|
||||
if ( al.isAligned() ) System.out.println("seq 4 aligned");
|
||||
else System.out.println("seq 4 did NOT align");
|
||||
|
||||
if ( al.size() == 1 ) a.add(seq4,"4",al.getAlignments().get(0));
|
||||
else System.out.println("Multiple alignments found for seq 4");
|
||||
|
||||
System.out.println(a.toAlignmentString(true, true));
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,67 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.oneoffprojects.utils;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
|
||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.LinkedList;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: asivache
|
||||
* Date: Sep 13, 2010
|
||||
* Time: 5:53:33 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class AssemblyGraph {
|
||||
|
||||
private List<Assembly> sources;
|
||||
|
||||
public AssemblyGraph(Assembly a) {
|
||||
sources = new LinkedList<Assembly>();
|
||||
sources.add(a);
|
||||
}
|
||||
|
||||
/** Initializes assembly from the single specified read, and sets this assembly as the root of this
|
||||
* assembly graph
|
||||
* @param r read; must be aligned, otherwise exception will be thrown
|
||||
* @param K index (Kmer) size of the assembly that will be initialized with the read r
|
||||
*/
|
||||
public AssemblyGraph(SAMRecord r, int K) {
|
||||
if (AlignmentUtils.isReadUnmapped(r))
|
||||
throw new StingException("Can not initialize assembly graph with unaligned read");
|
||||
sources = new LinkedList<Assembly>();
|
||||
sources.add( new Assembly(K,r.getReadBases(),r.getReadName(), r.getAlignmentStart()) );
|
||||
}
|
||||
|
||||
public void add(SAMRecord r) {
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,52 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.oneoffprojects.utils;
|
||||
|
||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: asivache
|
||||
* Date: Aug 3, 2010
|
||||
* Time: 4:53:01 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class DefaultAlignmentStrategy implements AlignmentStrategy {
|
||||
|
||||
public Action action(AlignmentInfo alignment, AlignmentList currentList) {
|
||||
if ( currentList.size() == 0 ) return Action.REPLACE_BEST;
|
||||
|
||||
if ( alignment.getMismatchCount() > currentList.getNextBestMMCount() ) return Action.DISCARD;
|
||||
|
||||
if ( alignment.getMismatchCount() < currentList.getBestMMCount() ) return Action.REPLACE_BEST;
|
||||
if ( alignment.getMismatchCount() == currentList.getBestMMCount() ) return Action.ADD_BEST;
|
||||
if ( alignment.getMismatchCount() < currentList.getNextBestMMCount() ) return Action.REPLACE_NEXTBEST;
|
||||
if ( alignment.getMismatchCount() == currentList.getNextBestMMCount() ) return Action.ADD_NEXTBEST;
|
||||
|
||||
throw new StingException("Unexpected case found and left unprocessed");
|
||||
// return null; //To change body of implemented methods use File | Settings | File Templates.
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,332 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.oneoffprojects.utils;
|
||||
|
||||
import org.broadinstitute.sting.utils.collections.PrimitivePair;
|
||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.HashMap;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: asivache
|
||||
* Date: Aug 3, 2010
|
||||
* Time: 1:31:15 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class KmerIndex {
|
||||
private HashMap<Integer,List<Integer> > lookup;
|
||||
private int K = -1;
|
||||
private int mask = 0;
|
||||
private boolean debug = false;
|
||||
|
||||
/**
|
||||
* Translates sequence <code>seq</code> into the list of all valid kmers paired
|
||||
* with their offsets on that sequence. Valid kmer is a kmer that contains only ACGT bases; if <code>seq</code>
|
||||
* contains any other symbols, no kmers overlapping with such symbols will be generated. This method
|
||||
* returns a linear (possibly gapped if non-ACGT symbols are present) representation of the sequence as its kmers
|
||||
* with corresponding offsets, NOT a lookup index. If a specific kmer occurs on the sequence N times,
|
||||
* the returned list will have N occurences of this kmer, each paired with one unique location on the sequence.
|
||||
* Kmers themselves are represented as integer kmer ids here, see #idToSeq() if string (ACGT bases) representation
|
||||
* of kmers is needed. Empty list if returned if no valid kmers are found on the sequence (i.e. too many non-ACGT bases)
|
||||
*
|
||||
* @param K key (kmer) length
|
||||
* @param seq sequence to translate into kmer/offset representation
|
||||
* @return list of kmer/offsets
|
||||
*/
|
||||
public static List<PrimitivePair.Int> toKeyOffsetList(int K, byte seq[]) {
|
||||
return toKeyOffsetList(K,seq,0,seq.length);
|
||||
}
|
||||
|
||||
/** Same as #toKeyOffsetList(int K, byte [] seq) (see docs), except that this method is not static and
|
||||
* uses key length K associated with the specific instance of the KmerIndex class.
|
||||
* @param seq
|
||||
* @return
|
||||
*/
|
||||
public List<PrimitivePair.Int> toKeyOffsetList(byte [] seq) {
|
||||
return toKeyOffsetList(this.K,seq);
|
||||
}
|
||||
|
||||
/** Returns an ordered sequence of overlapping (1-shift in the ideal case) k-mers of length K found in the subsequence of
|
||||
* length <code>length</code> of sequence <code>seq</code> starting at position <code>start</code>. All returned kmers
|
||||
* are fully subsumed by the interval [start, start+length) on the sequence <code>seq</code> (no partial overlaps).
|
||||
* Each kmer is paired with its offset on the (full length) <code>seq</code> in the returned list.
|
||||
* Note that only k-mers on the forward strand are returned. You need to manually rc the string and
|
||||
* call toKeyOffsetList() again to get rc k-mers. If sequence contains any other symbols than ACGT, all k-mers
|
||||
* that would overlap with those symbols will be skipped (not present in the returned list). See also
|
||||
* #toKeyOffsetList(int K, byte [] seq) which translates the whole sequence <code>seq</code>.
|
||||
*
|
||||
* @param K index key (k-mer) length
|
||||
* @param seq sequence to compute k-mers from
|
||||
* @param start compute kmers for subsequence [start, start+length) of <code>seq</code>
|
||||
* @param length compute kmers for subsequence [start, start+length) of <code>seq</code>
|
||||
* @return a list of pairs (kmer,offset_on_the_seq) for each valid kmer (i.e. kmer that does not overlap with
|
||||
* non-ACGT bases); if no valid kmers exist, the returned list will be empty.
|
||||
*/
|
||||
public static List<PrimitivePair.Int> toKeyOffsetList(int K, byte[] seq, int start, int length) {
|
||||
|
||||
if ( length < K ) throw new StingException("Can not index sequence that is shorter than key length: total seq="+seq.length+"; start="+start+"; length="+length);
|
||||
|
||||
int mask = 0;
|
||||
if ( length > K ) {
|
||||
for ( int i = 0; i < K ; i++ ) {
|
||||
mask <<= 2;
|
||||
mask |= 0x03;
|
||||
}
|
||||
}
|
||||
|
||||
int key = 0;
|
||||
int i ;
|
||||
final int final_pos = start+length; // first base *after* the last position we want to index
|
||||
|
||||
ArrayList<PrimitivePair.Int> l = new ArrayList<PrimitivePair.Int>(length-K+1);
|
||||
|
||||
PrimitivePair.Int firstK = toFirstKey(K,seq,start,final_pos);
|
||||
if ( firstK == null ) {
|
||||
// ooops, too many non-ACGT bases, we were not able to find a single valid k-mer on the whole sequence!
|
||||
return l;
|
||||
}
|
||||
|
||||
l.add(firstK);
|
||||
|
||||
start = firstK.getSecond();
|
||||
i = start + K; // i points to the first base after the returned kmer firstK
|
||||
key = firstK.getFirst();
|
||||
|
||||
// now let's try recomputing next kmers in an efficient way: we reuse previous kmer and add only the new last base.
|
||||
// This will break if we encounter a non-ACGT base, in which case we will have to start over.
|
||||
|
||||
for ( start++ ; i < final_pos ; i++, start++ ) {
|
||||
int d = BaseUtils.simpleBaseToBaseIndex(seq[i]);
|
||||
if ( d == -1 ) {
|
||||
// ooops, we ran into a bad base; let's jump over it completely and reinitialize the key
|
||||
// (since all kmers overlapping with the current base are invalid)
|
||||
firstK = toFirstKey(K,seq,i+1,final_pos);
|
||||
if ( firstK == null ) break; // no more valid kmers
|
||||
l.add(firstK);
|
||||
start = firstK.getSecond();
|
||||
i = start+K; // points to the base right after the Kmer we just found
|
||||
key = firstK.getFirst(); // reset key to the new kmer we just found
|
||||
} else {
|
||||
// the base is good, so we can compute our new kmer very efficiently using the old one:
|
||||
key <<= 2;
|
||||
key &= mask;
|
||||
key += d;
|
||||
l.add(new PrimitivePair.Int(key,start));
|
||||
}
|
||||
}
|
||||
return l;
|
||||
}
|
||||
|
||||
/** Non-static version of #toKeyOffsetList(int K, byte [] seq, int start, int length) (see docs), which
|
||||
* uses key length K associated with this instance of the KmerIndex object.
|
||||
* @param seq
|
||||
* @param start
|
||||
* @param length
|
||||
* @return
|
||||
*/
|
||||
public List<PrimitivePair.Int> toKeyOffsetList(byte[] seq, int start, int length) {
|
||||
return toKeyOffsetList(this.K,seq,start,length);
|
||||
}
|
||||
|
||||
|
||||
/** Computes index (key) of the first valid kmer in the interval [start,stop) of the sequence seq. Kmer is valid
|
||||
* if it contains only valid (ACGT) bases. Returns key and actual offset of first such kmer found, or null
|
||||
* if such kmer does not exist (i.e. if seq does not contain a continuous span of ACGT bases at least K bases long).
|
||||
* @param K
|
||||
* @param seq
|
||||
* @param start
|
||||
* @param stop
|
||||
* @return
|
||||
*/
|
||||
private static PrimitivePair.Int toFirstKey(int K, byte[] seq, int start, int stop) {
|
||||
int d = -1;
|
||||
int key = 0 ;
|
||||
while ( d == -1 && start < stop - K + 1) {
|
||||
key = 0;
|
||||
for ( int i = start ; i < start+K; i++ ) {
|
||||
key <<= 2;
|
||||
d = BaseUtils.simpleBaseToBaseIndex(seq[i]);
|
||||
if ( d == -1) {
|
||||
// ooops, non-ACGT base found, abort and start over. Next kmer that
|
||||
// have a chance to be valid (contain only ACGT bases) can start only after the current position:
|
||||
start = i+1;
|
||||
break;
|
||||
}
|
||||
key += d;
|
||||
}
|
||||
} // got the first key
|
||||
|
||||
if ( d != -1 ) return new PrimitivePair.Int(key,start);
|
||||
else return null;
|
||||
}
|
||||
|
||||
/** Creates an empty kmer index table with specified key length
|
||||
*
|
||||
* @param K
|
||||
*/
|
||||
public KmerIndex(final int K) {
|
||||
if ( K > 16 ) throw new StingException("Lookup keys longer than 16 bases are currently not supported");
|
||||
if ( K % 2 == 0 ) throw new StingException("Even keys require additional processing of palindromes, currently not supported. Please use odd key.");
|
||||
this.K = K;
|
||||
|
||||
mask = 0;
|
||||
for ( int i = 0; i < K; i++ ) {
|
||||
mask <<= 2;
|
||||
mask |= 0x03;
|
||||
} // got the first key
|
||||
|
||||
lookup = new HashMap<Integer,List<Integer>>();
|
||||
}
|
||||
|
||||
/** Builds kmer index table with key length <code>K</code> for the sequence <code>seq</code>.
|
||||
*
|
||||
* @param K
|
||||
* @param seq
|
||||
*/
|
||||
public KmerIndex(final int K, final byte[] seq) {
|
||||
this(K);
|
||||
|
||||
if ( seq.length < K ) throw new StingException("Sequence is shorter than requested lookup index key length");
|
||||
|
||||
addToIndex(toKeyOffsetList(K,seq,0,seq.length));
|
||||
}
|
||||
|
||||
public void setDebug(boolean d) { this.debug = d; }
|
||||
|
||||
/** Clears current lookup index table completely (but preserves the key length previously set).
|
||||
*
|
||||
*/
|
||||
public void clear() { lookup.clear(); }
|
||||
|
||||
/** Builds complete index for the sequence seq. This method can be used only when lookup table is
|
||||
* empty (i.e. use #clear() first), otherwise an exception will be thrown.
|
||||
* @param seq
|
||||
*/
|
||||
public void index(final byte[] seq) {
|
||||
if ( ! lookup.isEmpty() ) {
|
||||
throw new StingException("Can not index new sequence: lookup table is already non-empty");
|
||||
}
|
||||
addToIndex(toKeyOffsetList(K,seq,0,seq.length));
|
||||
}
|
||||
|
||||
/**
|
||||
* Updates existing index. It is assumed that the sequence that was already indexed by this KmerIndex object is
|
||||
* the exact subsequence of length <code>old_length</code> of the new sequence <code>seq</code>, starting at
|
||||
* position <code>old_start</code>. No checks are performed, so it is the responsibility of the caller to ensure
|
||||
* that this is indeed the case, otherwise the index will be inconsistent. Since the old sequence is a part
|
||||
* of the new one, this method will keep all the already computed kmers (and update their offsets as needed),
|
||||
* and compute and add kmers/offsets for all the novel bases added to the sequence <code>seq</code> compared
|
||||
* to the old, already indexed subsequnce. If <code>old_length</code> is less than
|
||||
* K (i.e. old sequence could not be and was not indexed at all), the new sequence <code>seq</code> will
|
||||
* be fully indexed from start to end.
|
||||
* @param seq
|
||||
* @param old_start already indexed subsequence starts at this position in <code>seq</code>
|
||||
* @param old_length length of the already indexed subsequence
|
||||
*/
|
||||
public void updateIndex(final byte[] seq, final int old_start, final int old_length) {
|
||||
|
||||
if ( old_length < K ) {
|
||||
if ( ! lookup.isEmpty())
|
||||
throw new StingException("It is claimed that old indexed sequence is shorter than K (i.e. it could not be indexed), but index is non empty");
|
||||
addToIndex( toKeyOffsetList(K,seq,0,seq.length));
|
||||
return;
|
||||
}
|
||||
|
||||
if ( old_start > 0 ) {
|
||||
// update positions of previously indexed k-mers:
|
||||
for ( Map.Entry<Integer,List<Integer>> e : lookup.entrySet() ) {
|
||||
List<Integer> l = e.getValue();
|
||||
for ( int i = 0 ; i < l.size(); i++ ) l.set(i,l.get(i)+old_start);
|
||||
}
|
||||
// take care of additional k-mers appearing *before* the already indexed subsequence:
|
||||
// if already indexed subsequence starts at 'start', the first k-mer from that sequence
|
||||
// ends at start+K-1 (inclusive) and it is obviously already indexed. So the last k-mer we want to index now ends at
|
||||
// start+K-2 (inclusive), the length of [0,start+K-2] interval that we need to index is
|
||||
// start+K-1.
|
||||
addToIndex( toKeyOffsetList(K,seq,0,old_start+K-1) );
|
||||
}
|
||||
|
||||
// the last k-mer we already indexed ends at start+length-1 (inclusive); so it starts at start+length-1-(K-1)=start+length-K.
|
||||
// Hence, the first k-mer that is not indexed yet starts at start+length-K+1. The length of the subsequence that
|
||||
// we need to index, [start+length-K+1,seq.length) is seq.length - start - length +K - 1
|
||||
|
||||
int pos = old_start+old_length-K+1;
|
||||
addToIndex( toKeyOffsetList(K,seq,pos,seq.length-pos) );
|
||||
|
||||
|
||||
}
|
||||
|
||||
/** Convenience shortcut: takes the list of keys/offsets and pushes offsets into the lookup index for the keys that
|
||||
* do exist already, or first creates the new entry and then pushes the offset for keys that are novel. This method
|
||||
* is quiet: if <code>keys</code> is <code>null</code> or an empty list, it does nothing.
|
||||
* @param keys
|
||||
*/
|
||||
private void addToIndex(final List<PrimitivePair.Int> keys ) {
|
||||
if ( keys == null ) return;
|
||||
for ( PrimitivePair.Int key: keys ) {
|
||||
List<Integer> l = lookup.get(key.getFirst());
|
||||
if ( l == null ) {
|
||||
l = new ArrayList<Integer>();
|
||||
lookup.put(key.getFirst(),l);
|
||||
}
|
||||
l.add(key.getSecond());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts kmer (integer key) of length K into its sequence representation. Returns a sequence (over ACGT alphabet)
|
||||
* of length K that corresponds to the specified key.
|
||||
* @param K
|
||||
* @param kmer
|
||||
* @return
|
||||
*/
|
||||
public static byte [] idToSeq(int K, int kmer) {
|
||||
byte [] seq = new byte[K];
|
||||
for ( int i = K-1; i >=0 ; i-- ) {
|
||||
seq[i] = BaseUtils.baseIndexToSimpleBase(kmer & 0x3);
|
||||
kmer >>= 2;
|
||||
}
|
||||
return seq;
|
||||
}
|
||||
|
||||
/** Returns all offsets for the specified kmer (key) on the sequence indexed by this KmerIndex object. Returns
|
||||
* null if specified kmer is not present on the indexed sequence.
|
||||
* @param key
|
||||
* @return
|
||||
*/
|
||||
public List<Integer> getOffsets(int key) { return lookup.get(key); }
|
||||
// public List<Integer> getOffsets(byte[] seq) {
|
||||
// if ( seq.length != K ) throw new StingException("Can not perform direct lookup of a sequence with length different from key size");
|
||||
//
|
||||
// return getOffsets( toKey(seq) ) ;
|
||||
// }
|
||||
}
|
||||
|
|
@ -0,0 +1,401 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.oneoffprojects.utils;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.CigarElement;
|
||||
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
|
||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: asivache
|
||||
* Date: Aug 6, 2010
|
||||
* Time: 6:18:01 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class ReadPair {
|
||||
|
||||
public enum PairType {
|
||||
UNKNOWN,
|
||||
BOTH_UNMAPPED,
|
||||
ONE_UNMAPPED,
|
||||
PROPER,
|
||||
LEFT,
|
||||
RIGHT,
|
||||
OUTER,
|
||||
INTER
|
||||
};
|
||||
|
||||
|
||||
private SAMRecord end1 = null;
|
||||
private SAMRecord end2 = null;
|
||||
private PairType pType = PairType.UNKNOWN;
|
||||
private int leftStart = -1;
|
||||
private int rightStart = -1;
|
||||
private SAMRecord leftRead = null;
|
||||
private SAMRecord rightRead = null;
|
||||
|
||||
|
||||
|
||||
/** Creates an empty read pair object */
|
||||
public ReadPair() {}
|
||||
|
||||
/** Creates a read pair objects initialized with the specified read */
|
||||
public ReadPair(SAMRecord read) {
|
||||
addRead(read);
|
||||
}
|
||||
|
||||
/** Returns name of the paired read (it is assumed that both individual reads in the pair share same name).
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public String getName() { return ( end1 != null ? end1.getReadName() : (end2 != null ? end2.getReadName() : null) ); }
|
||||
|
||||
/** Returns true if both ends are recorded in this read pair object. Note that because SAM records carry
|
||||
* mate information, a pair can be (partially) initialized from one end. This method verifies that this is not the case
|
||||
* and both records are actually present.
|
||||
* @return
|
||||
*/
|
||||
public boolean hasBothEnds() { return end1 != null && end2 != null ; }
|
||||
|
||||
/** Returns true if this pair object was initialized with at least one end. Since SAM records carry mate information,
|
||||
* it is sometimes sufficient to have only one read (fragment end) actually recorded in the pair object, at which
|
||||
* point some useful information can be retrieved for the pair already.
|
||||
* @return
|
||||
*/
|
||||
public boolean hasAnyData() { return end1 != null || end2 != null ; }
|
||||
|
||||
/** Returns true if both ends in the pair are mapped. The pair object must be at least partially initialized (i.e.
|
||||
* it has to hold a reference to at least one end of the pair), otherwise an exception will be thrown.
|
||||
* @return
|
||||
*/
|
||||
public boolean bothEndsMapped() {
|
||||
if ( pType == PairType.UNKNOWN ) throw new StingException("ReadPair object was not initialized yet, method can not be applied");
|
||||
|
||||
if ( pType == PairType.BOTH_UNMAPPED || pType == PairType.ONE_UNMAPPED ) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
/** Returns true if both ends in the pair are mapped uniquely. This method requires both ends being already registered
|
||||
* in this pair object (i.e. hasBothEnds() is true), otherwise an exception will be thrown.
|
||||
* @return
|
||||
*/
|
||||
public boolean bothEndsUniquelyMapped() {
|
||||
if ( ! hasBothEnds() ) throw new StingException("Can not determine if both ends are uniquely mapped until both ends are recorded");
|
||||
return bothEndsMapped() && end1.getMappingQuality() > 0 && end2.getMappingQuality() > 0;
|
||||
}
|
||||
|
||||
/** Returns true if this pair is in proper orientation, i.e. ---> <--- on the same contig */
|
||||
public boolean isProper() { return pType == PairType.PROPER; }
|
||||
|
||||
/* Returns true if this pair is in outer orientation, i.e. <--- ---> on the same chromosome */
|
||||
public boolean isOuter() { return pType == PairType.OUTER; }
|
||||
|
||||
/** Returns left (coordinate-wise) read in the pair. Both ends need to be mapped, and they should map
|
||||
* onto the same contig, otherwise an exception will be thrown.
|
||||
* @return
|
||||
*/
|
||||
public SAMRecord getLeftRead() {
|
||||
if ( ! bothEndsMapped() || pType == PairType.INTER )
|
||||
throw new StingException("Left read can be identified only when both reads are mapped onto the same contig, and the are not for "+getName());
|
||||
if ( leftRead == null )
|
||||
throw new StingException("Left read is not recorded. Maybe we have not seen it yet? Pair: "+getName());
|
||||
return leftRead;
|
||||
}
|
||||
|
||||
/** Returns right (coordinate-wise) read in the pair. Both ends need to be mapped, and they should map
|
||||
* onto the same contig, otherwise an exception will be thrown.
|
||||
* @return
|
||||
*/
|
||||
public SAMRecord getRightRead() {
|
||||
if ( ! bothEndsMapped() || pType == PairType.INTER )
|
||||
throw new StingException("Right read can be identified only when both reads are mapped onto the same contig, and the are not for "+getName());
|
||||
if ( rightRead == null )
|
||||
throw new StingException("Right read is not recorded. Maybe we have not seen it yet? Pair: "+getName());
|
||||
return rightRead;
|
||||
}
|
||||
|
||||
public SAMRecord getEnd1() { return end1; }
|
||||
public SAMRecord getEnd2() { return end2; }
|
||||
|
||||
public PairType getPairType() { return pType ; }
|
||||
|
||||
public void addRead(SAMRecord r) {
|
||||
if ( ! r.getReadPairedFlag() ) throw new StingException("Read "+r.getReadName() +" is unpaired");
|
||||
if ( r.getFirstOfPairFlag() ) {
|
||||
if ( end1 != null ) throw new StingException("Read "+r.getReadName()+" is first of pair and the pair already has first read recorded");
|
||||
end1 = r;
|
||||
if ( end2 != null && ! end1.getReadName().equals(end2.getReadName()) )
|
||||
throw new StingException("The pair already has read "+end2.getReadName() +"; the read being added does not match by name ("+r.getReadName()+")" );
|
||||
} else {
|
||||
if ( r.getSecondOfPairFlag() ) {
|
||||
if ( end2 != null ) throw new StingException("Read "+r.getReadName()+" is second of pair and the pair already has second read recorded");
|
||||
end2 = r;
|
||||
if ( end1 != null && ! end1.getReadName().equals(end2.getReadName()) )
|
||||
throw new StingException("The pair already has read "+end1.getReadName() +"; the read being added does not match by name ("+r.getReadName()+")" );
|
||||
} else {
|
||||
throw new StingException("The read "+r.getReadName()+" is marked as paired, but the first/second of pair flag is not set");
|
||||
}
|
||||
}
|
||||
setPairInfo(r);
|
||||
}
|
||||
|
||||
/** If pair type has not been set yet, then sets it to <code>t</code>. Otherwise (pair type already set),
|
||||
* just checks if the pair type is <code>t</t>. If it is, the method returns quietly; if it is not (inconsistency detected),
|
||||
* throws an exception.
|
||||
*
|
||||
*/
|
||||
private void setCheckPairType(PairType t) {
|
||||
if ( pType != PairType.UNKNOWN ) {
|
||||
if ( pType != t )
|
||||
throw new StingException("In pair "+getName()+" two ends provide conflicting alignment information");
|
||||
} else pType = t;
|
||||
}
|
||||
|
||||
private void setCheckLeftStart(int pos) {
|
||||
if ( leftStart >= 0 ) {
|
||||
if ( leftStart != pos )
|
||||
throw new StingException("In pair "+getName()+" two ends provide conflicting alignment information");
|
||||
} else leftStart = pos;
|
||||
}
|
||||
|
||||
private void setCheckRightStart(int pos) {
|
||||
if ( rightStart >= 0 ) {
|
||||
if ( rightStart != pos )
|
||||
throw new StingException("In pair "+getName()+" two ends provide conflicting alignment information");
|
||||
} else rightStart = pos;
|
||||
}
|
||||
|
||||
private void setPairInfo(SAMRecord read) {
|
||||
|
||||
setCheckPairType(getPairType(read));
|
||||
|
||||
// there is nothing left to do unless both ends are mapped onto the same contig:
|
||||
if ( pType == PairType.INTER ) return;
|
||||
|
||||
if ( pType == PairType.ONE_UNMAPPED ) {
|
||||
// set putative left or right read depending on the orientation of the only mapped mate
|
||||
if ( ! AlignmentUtils.isReadUnmapped(read ) ) {
|
||||
// we can set left/right read only if it is the current read that is mapped; if we have the
|
||||
// unmapped mate, skip and wait for the mapped read to come!
|
||||
if ( read.getReadNegativeStrandFlag() ) {
|
||||
setCheckRightStart(read.getAlignmentStart());
|
||||
if ( rightRead != null ) throw new StingException("Right read was already set for the pair");
|
||||
rightRead = read;
|
||||
} else {
|
||||
setCheckLeftStart(read.getAlignmentStart());
|
||||
if ( leftRead != null ) throw new StingException("Left read was already set for the pair");
|
||||
leftRead = read;
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// we are here if both ends are mapped and they map onto the same contig
|
||||
if ( read.getAlignmentStart() < read.getMateAlignmentStart() ) { //left/right = read/mate
|
||||
|
||||
setCheckLeftStart(read.getAlignmentStart());
|
||||
setCheckRightStart(read.getMateAlignmentStart());
|
||||
|
||||
if ( leftRead != null ) throw new StingException("Left read was already set for the pair");
|
||||
leftRead = read;
|
||||
} else {
|
||||
// left/right = mate/read
|
||||
|
||||
setCheckLeftStart(read.getMateAlignmentStart());
|
||||
setCheckRightStart(read.getAlignmentStart());
|
||||
|
||||
if ( rightRead != null ) throw new StingException("Right read was already set for the pair");
|
||||
rightRead = read;
|
||||
}
|
||||
}
|
||||
|
||||
/** Returns pair type that describes this read and its mate. The alignment information for both the read itself
|
||||
* and its mate is taken from the read's sam record passed as the argument, so the mate information is expected to be
|
||||
* correctly set!
|
||||
* @param read
|
||||
* @return
|
||||
*/
|
||||
public static PairType getPairType(SAMRecord read) {
|
||||
|
||||
if ( AlignmentUtils.isReadUnmapped(read) ) {
|
||||
if ( AlignmentUtils.isMateUnmapped(read) ) return PairType.BOTH_UNMAPPED;
|
||||
else return PairType.ONE_UNMAPPED;
|
||||
}
|
||||
|
||||
return getWouldBePairType(read,read.getReferenceIndex(),read.getAlignmentStart(),read.getReadNegativeStrandFlag());
|
||||
}
|
||||
|
||||
/** Returns pair type that would describe this read and its mate, if this read mapped onto refId:start in orientation
|
||||
* given by rc (forward is rc=false, reverse is rc=true). The read's alignment information (if any,
|
||||
* unmapped reads are allowed) present in the SAM record is completely ignored by this method,
|
||||
* only mate's information is used.
|
||||
* @param read
|
||||
* @param refId
|
||||
* @param start
|
||||
* @param rc
|
||||
* @return
|
||||
*/
|
||||
public static PairType getWouldBePairType(SAMRecord read, int refId, int start, boolean rc) {
|
||||
|
||||
|
||||
if ( AlignmentUtils.isMateUnmapped(read) ) return PairType.ONE_UNMAPPED ;
|
||||
|
||||
// both read and mate are mapped:
|
||||
|
||||
if ( refId != read.getMateReferenceIndex() ) return PairType.INTER;
|
||||
|
||||
// both read and its mate map onto the same chromosome
|
||||
|
||||
if ( start < read.getMateAlignmentStart() ) { //left/right = read/mate
|
||||
|
||||
if ( rc ) {
|
||||
if ( read.getMateNegativeStrandFlag() ) return PairType.LEFT;
|
||||
else return PairType.OUTER;
|
||||
} else {
|
||||
if ( read.getMateNegativeStrandFlag() ) return PairType.PROPER;
|
||||
else return PairType.RIGHT;
|
||||
}
|
||||
} else {
|
||||
// left/right = mate/read
|
||||
|
||||
if ( rc ) {
|
||||
if ( read.getMateNegativeStrandFlag() ) return PairType.LEFT;
|
||||
else return PairType.PROPER;
|
||||
} else {
|
||||
if ( read.getMateNegativeStrandFlag() ) return PairType.OUTER;
|
||||
else return PairType.RIGHT;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public int getLeftStart() {
|
||||
if ( ! hasAnyData() ) throw new StingException("ReadPair object was not initialized yet, method can not be applied");
|
||||
return leftStart;
|
||||
}
|
||||
|
||||
public int getRightStart() {
|
||||
if ( ! hasAnyData() ) throw new StingException("ReadPair object was not initialized yet, method can not be applied");
|
||||
return rightStart;
|
||||
}
|
||||
|
||||
public int getFragmentSize() {
|
||||
if ( ! hasBothEnds() ) throw new StingException("Can not determine fragment size: pair object does not have both ends yet");
|
||||
if ( ! bothEndsMapped() ) throw new StingException("Can not determine fragment size: both ends must be mapped");
|
||||
if ( pType != PairType.PROPER ) throw new StingException("The pais is not in proper orientation, can not determine fragment size");
|
||||
|
||||
return getFragmentSize(leftRead,rightRead);
|
||||
}
|
||||
|
||||
/** Given a read (that must belong to this pair), returns the other end in the pair if it is already
|
||||
* recorded, or null otherwise.
|
||||
* @param read
|
||||
* @return
|
||||
*/
|
||||
public SAMRecord getOtherEnd(SAMRecord read) {
|
||||
if ( read.getFirstOfPairFlag() ) return end2;
|
||||
else {
|
||||
if ( read.getSecondOfPairFlag() ) return end1;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
public static int getFragmentSize(SAMRecord left, SAMRecord right) {
|
||||
|
||||
if ( left == null || right == null ||
|
||||
AlignmentUtils.isReadUnmapped(left) || AlignmentUtils.isReadUnmapped(right) ) {
|
||||
throw new StingException("No read (null) or unmapped read provided: fragment size is not defined");
|
||||
}
|
||||
if ( left.getReferenceIndex() != right.getReferenceIndex() ) {
|
||||
throw new StingException("Left/right reads map onto different contigs: fragment size is not defined");
|
||||
}
|
||||
|
||||
int fragment_length = left.getReadLength(); // fragment is at least as long as the left read, duh!
|
||||
int leftEnd = left.getAlignmentEnd();
|
||||
int rightStart = right.getAlignmentStart();
|
||||
|
||||
if ( rightStart > leftEnd ) {
|
||||
// if reads are not overlapping, fragment length is lengths of both reads plus the distance (gap) between
|
||||
// the reads. Note that if the sequence between the reads happens to have insirtions or deletions,
|
||||
// our estimation of the actual distance between the reads (on the fragment) is incorrect, but we
|
||||
// can not do better given just those reads. This estimation is, in particular, incorrect
|
||||
// for left reads ending with 'I' and/or right reads starting with 'I'
|
||||
//
|
||||
// left right
|
||||
// -------->...gap...<-------- fragment = left+gap+right
|
||||
|
||||
return left.getReadLength() + right.getReadLength() + (rightStart - leftEnd-1);
|
||||
}
|
||||
|
||||
// if we are here, the reads do overlap; fragment length is lengths of the two reads less the overlap.
|
||||
// in this case we can compute the actual overlap between the reads (on the fragment) taking into
|
||||
// account indels, if any
|
||||
//
|
||||
// left **** right
|
||||
// ------------> ****=overlap; fragment = left+right - overlap
|
||||
// <--------------
|
||||
//
|
||||
// with deletion:
|
||||
//
|
||||
// left ** ** right
|
||||
// -----------ddd-> ****=overlap; fragment = left+right - overlap
|
||||
// <-ddd------------- note that overlap != leftEnd - rightStart+1
|
||||
// instead, overlap = leftEnd-rightStart+1- length(D)
|
||||
// with insertion:
|
||||
//
|
||||
// left ******* right ******* = overlap; fragment = left+right - overlap
|
||||
// -------------iii-> note that overlap != leftEnd - rightStart +1
|
||||
// <-iii-------------- instead, overlap = leftEnd - rightStart +1 + length(I)
|
||||
// (since 'i' bases are NOT on the ref)
|
||||
|
||||
int posOnRef = rightStart;
|
||||
// int posOnRightRead = 0;
|
||||
|
||||
int overlap = leftEnd - rightStart + 1 ;
|
||||
|
||||
for(CigarElement ce : left.getCigar().getCigarElements() ) {
|
||||
switch(ce.getOperator()) {
|
||||
case S:
|
||||
case H:
|
||||
// posOnRightRead+=ce.getLength();
|
||||
break;
|
||||
case I:
|
||||
overlap += ce.getLength();
|
||||
break;
|
||||
case D:
|
||||
case N:
|
||||
overlap -= ce.getLength();
|
||||
case M:
|
||||
posOnRef += ce.getLength();
|
||||
break;
|
||||
default:
|
||||
}
|
||||
if ( posOnRef > leftEnd ) break; // we need to examine only overlapping part of the reads
|
||||
}
|
||||
return left.getReadLength() + right.getReadLength() - overlap;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,195 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.oneoffprojects.walkers;
|
||||
|
||||
import org.broadinstitute.sting.gatk.walkers.WalkerName;
|
||||
import org.broadinstitute.sting.gatk.walkers.Requires;
|
||||
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceDataSource;
|
||||
import org.broadinstitute.sting.oneoffprojects.utils.ReadPair;
|
||||
import org.broadinstitute.sting.oneoffprojects.utils.AlignmentInfo;
|
||||
import org.broadinstitute.sting.oneoffprojects.utils.Assembly;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
import java.util.Map;
|
||||
import java.util.HashMap;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: asivache
|
||||
* Date: Sep 13, 2010
|
||||
* Time: 12:45:42 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
|
||||
@WalkerName("DetectWGA")
|
||||
@Requires(value={DataSource.REFERENCE_BASES})
|
||||
|
||||
public class DetectWGAWalker extends ReadWalker<Integer,Integer> {
|
||||
|
||||
private int TIP_LENGTH = 10;
|
||||
private int TIP_MM_THRESHOLD = 1;
|
||||
private double TIP_AV_QUAL_THRESHOLD = 15.0;
|
||||
|
||||
private boolean DEBUG = true;
|
||||
|
||||
Map<String, ReadPair> pairCache = null;
|
||||
Map<String,MathUtils.RunningAverage> fragmentSizeMap = null; // by library
|
||||
private ReferenceDataSource refData;
|
||||
private byte[] refBases;
|
||||
|
||||
|
||||
|
||||
|
||||
@Override
|
||||
public void initialize() {
|
||||
refData = new ReferenceDataSource(getToolkit().getArguments().referenceFile);
|
||||
pairCache = new HashMap<String, ReadPair>();
|
||||
fragmentSizeMap = new HashMap<String,MathUtils.RunningAverage>();
|
||||
}
|
||||
|
||||
|
||||
public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
|
||||
|
||||
if ( ! read.getReadPairedFlag() ) return 0; // for now!!
|
||||
|
||||
// read is paired
|
||||
|
||||
cacheReadAsPair(read);
|
||||
|
||||
/*
|
||||
// if the read is already mapped (uniquely), we check if it may have the "colored tip" artifact on either side:
|
||||
if ( AlignmentUtils.isReadUniquelyMapped(read) ) {
|
||||
|
||||
TipInfo tips = countTipMismatches(read,TIP_LENGTH);
|
||||
|
||||
if ( tips.leftMM() >= TIP_MM_THRESHOLD || tips.rightMM() >= TIP_MM_THRESHOLD ) {
|
||||
if ( DEBUG ) {
|
||||
out.println(" Read "+read.getReadName()+ " has "+tips.leftMM()+"/"+tips.rightMM()+" mismatches in the tips");
|
||||
out.println(" Pair orientation: "+pair.getPairType());
|
||||
}
|
||||
// try adding read to existing assemblies:
|
||||
AlignmentInfo al = alignToAllAddToBest(read,Math.min(3,tips.leftMM()+tips.rightMM())-1);
|
||||
if ( al == null ) {
|
||||
if ( tips.leftMM() >= TIP_MM_THRESHOLD && tips.leftQ() >= TIP_AV_QUAL_THRESHOLD ||
|
||||
tips.rightMM() >= TIP_MM_THRESHOLD && tips.rightQ() >= TIP_AV_QUAL_THRESHOLD ) {
|
||||
if ( DEBUG ) out.println(" Initialized new assembly.") ;
|
||||
Assembly a = new Assembly(read.getReadBases(),read.getReadName(),read.getAlignmentStart());
|
||||
tryAndAddUnmapped(a); // see if we got unmapped reads that would align nicely
|
||||
assemblies.add(a);
|
||||
}
|
||||
}
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
*/
|
||||
|
||||
|
||||
|
||||
return null; //To change body of implemented methods use File | Settings | File Templates.
|
||||
}
|
||||
|
||||
/**
|
||||
* Provide an initial value for reduce computations.
|
||||
*
|
||||
* @return Initial value of reduce.
|
||||
*/
|
||||
public Integer reduceInit() {
|
||||
return null; //To change body of implemented methods use File | Settings | File Templates.
|
||||
}
|
||||
|
||||
/**
|
||||
* Reduces a single map with the accumulator provided as the ReduceType.
|
||||
*
|
||||
* @param value result of the map.
|
||||
* @param sum accumulator for the reduce.
|
||||
* @return accumulator with result of the map taken into account.
|
||||
*/
|
||||
public Integer reduce(Integer value, Integer sum) {
|
||||
return null; //To change body of implemented methods use File | Settings | File Templates.
|
||||
}
|
||||
|
||||
/** little helper: if we already cached the pair object for this read, just add the read to that object; if we did not - instantiate
|
||||
* new pair objetc first and register it in the map, then add the read; this method also updates other cache(s)/trackers as needed, e.g.
|
||||
* fragment size map
|
||||
*/
|
||||
private void cacheReadAsPair(SAMRecord read) {
|
||||
ReadPair pair = pairCache.get( read.getReadName() );
|
||||
if ( pair == null ) {
|
||||
pair = new ReadPair(read);
|
||||
pairCache.put(read.getReadName(),pair);
|
||||
}
|
||||
|
||||
pair.addRead(read);
|
||||
|
||||
// if it's a good pair, add its fragment size to the stats:
|
||||
if ( pair.hasBothEnds() && pair.bothEndsMapped() && pair.isProper() ) {
|
||||
String lib = read.getReadGroup().getLibrary();
|
||||
MathUtils.RunningAverage fSize = fragmentSizeMap.get(lib);
|
||||
if ( fSize == null ) {
|
||||
fSize = new MathUtils.RunningAverage();
|
||||
fragmentSizeMap.put(lib,fSize);
|
||||
}
|
||||
fSize.add(pair.getFragmentSize());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private TipInfo countTipMismatches(SAMRecord read, int tip_length) {
|
||||
|
||||
AlignmentUtils.MismatchCount left_mm = AlignmentUtils.getMismatchCount(read,refBases,read.getAlignmentStart()-1,0,tip_length);
|
||||
|
||||
int right_start = read.getReadLength()-tip_length;
|
||||
AlignmentUtils.MismatchCount right_mm = AlignmentUtils.getMismatchCount(read,refBases,read.getAlignmentStart()-1,right_start,read.getReadLength()-right_start);
|
||||
|
||||
return new TipInfo(left_mm,right_mm);
|
||||
}
|
||||
|
||||
class TipInfo {
|
||||
AlignmentUtils.MismatchCount left_mm;
|
||||
AlignmentUtils.MismatchCount right_mm;
|
||||
double left_avQ;
|
||||
double right_avQ;
|
||||
|
||||
public TipInfo(AlignmentUtils.MismatchCount l,AlignmentUtils.MismatchCount r) {
|
||||
left_mm = l;
|
||||
right_mm = r;
|
||||
left_avQ = (l.numMismatches ==0 ? 0 : ((double)l.mismatchQualities)/l.numMismatches );
|
||||
right_avQ = (r.numMismatches ==0 ? 0 : ((double)r.mismatchQualities)/r.numMismatches );
|
||||
}
|
||||
|
||||
public int leftMM() { return left_mm.numMismatches; }
|
||||
public int rightMM() { return right_mm.numMismatches; }
|
||||
public double leftQ() { return left_avQ; }
|
||||
public double rightQ() { return right_avQ; }
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -363,7 +363,13 @@ public class PairMaker extends CommandLineProgram {
|
|||
}
|
||||
}
|
||||
if ( best.size() == 0 ) return null; // no unique alignment
|
||||
if ( best.size() > 1 ) throw new RuntimeException("Multiple alignments for read "+l.get(0).getReadName()+", all with Q>="+minq);
|
||||
if ( best.size() > 1 ) {
|
||||
for ( SAMRecord r : best ) {
|
||||
System.out.println("READ "+r.getReadName()+" mapQ="+r.getMappingQuality()+" at="+r.getReferenceName()+
|
||||
":"+r.getAlignmentStart()+"("+(r.getReadNegativeStrandFlag()?"-":"+")+") cig="+r.getCigarString());
|
||||
}
|
||||
throw new RuntimeException("Multiple alignments for read "+l.get(0).getReadName()+", all with Q>="+minq);
|
||||
}
|
||||
|
||||
return best.get(0);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,171 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.interval;
|
||||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.gatk.iterators.PushbackIterator;
|
||||
|
||||
import java.util.Iterator;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: asivache
|
||||
* Date: Oct 7, 2010
|
||||
* Time: 2:40:02 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
|
||||
/** This class provides an adapter to Iterator<GenomeLoc> that returns only (parts of) underlying iterator's
|
||||
* intervals overlapping with specified "master set" of bounding intervals. The underlying iterator must return
|
||||
* NON-overlapping intervals in coordinate-sorted order, otherwise the behavior is unspecified. If the master set is represented by
|
||||
* another interval iterator, it should return sorted and NON-overlapping intervals.
|
||||
*
|
||||
*/
|
||||
public class OverlappingIntervalIterator implements Iterator<GenomeLoc> {
|
||||
PushbackIterator<GenomeLoc> iter = null;
|
||||
PushbackIterator<GenomeLoc> boundBy = null;
|
||||
|
||||
GenomeLoc prefetchedOverlap = null;
|
||||
GenomeLoc currentBound = null;
|
||||
GenomeLoc currentInterval = null;
|
||||
|
||||
/** Creates new overlapping iterator that will internally traverse <code>intervals</code> and return only
|
||||
* overlaps of those with set of intervals returned by <code>boundBy</code>.
|
||||
* @param intervals
|
||||
* @param boundBy
|
||||
*/
|
||||
public OverlappingIntervalIterator(Iterator<GenomeLoc> intervals, Iterator<GenomeLoc> boundBy) {
|
||||
this.iter = new PushbackIterator(intervals);
|
||||
this.boundBy = new PushbackIterator(boundBy);
|
||||
|
||||
if ( iter.hasNext() && boundBy.hasNext() ) {
|
||||
GenomeLoc currentInterval = iter.next(); // load first interval
|
||||
GenomeLoc currentBound = boundBy.next(); // load first bounding interval
|
||||
fetchNextOverlap();
|
||||
}
|
||||
}
|
||||
|
||||
/** Traverses both iterators in sync, until the first overlap between the two is reached. If no overlap is found
|
||||
* until the end of the either of the two streams, leaves prefetchedOverlap set to null
|
||||
*/
|
||||
private void fetchNextOverlap() {
|
||||
|
||||
prefetchedOverlap = null;
|
||||
|
||||
while ( prefetchedOverlap == null ) {
|
||||
|
||||
if ( currentInterval.isBefore(currentBound) ) {
|
||||
if ( ! iter.hasNext() ) break; // no more intervals left; we are done
|
||||
currentInterval = iter.next();
|
||||
continue;
|
||||
}
|
||||
|
||||
if ( currentInterval.isPast(currentBound) ) {
|
||||
if ( ! boundBy.hasNext() ) break; // we are past the last available bounding interval, we are done!
|
||||
currentBound = boundBy.next();
|
||||
continue;
|
||||
}
|
||||
|
||||
// we are at this point only if currentInterval overlaps with currentBound
|
||||
|
||||
prefetchedOverlap = currentInterval.intersect(currentBound);
|
||||
|
||||
// we still do not know if we are done with either current interval or current bound, because
|
||||
// two special situations are possible:
|
||||
//
|
||||
// 1) next interval overlaps with 2) current interval also overlaps with
|
||||
// the same bounding interval; next bounding interval; note that
|
||||
// note that in this case next in this case next bound necessarily
|
||||
// interval necessarily starts before starts before the next interval
|
||||
// the next bound
|
||||
//
|
||||
// curr. int next int. curr. int
|
||||
// ----- ------ --------------------------
|
||||
// ------------------- --------- -------------
|
||||
// curr. bound curr. bound next bound
|
||||
|
||||
// To solve this issue we update either only currentInterval or only currentBound to their next value,
|
||||
// whichever comes first; the rest of the traversal to the next overlap will be performed on the next invocation of
|
||||
// fetchNextOverlap().
|
||||
|
||||
if ( ! iter.hasNext() ) {
|
||||
|
||||
}
|
||||
GenomeLoc nextInterval = iter.next();
|
||||
GenomeLoc nextBound = boundBy.next();
|
||||
|
||||
if ( nextInterval.startsBefore(nextBound)) {
|
||||
currentInterval = nextInterval;
|
||||
boundBy.pushback(nextBound); // in case next interval overlaps with the current bound
|
||||
} else {
|
||||
currentBound = nextBound;
|
||||
iter.pushback(nextInterval); // in case current interval also overlaps with the next bound
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns <tt>true</tt> if the iteration has more elements. (In other
|
||||
* words, returns <tt>true</tt> if <tt>next</tt> would return an element
|
||||
* rather than throwing an exception.)
|
||||
*
|
||||
* @return <tt>true</tt> if the iterator has more elements.
|
||||
*/
|
||||
public boolean hasNext() {
|
||||
return false; //To change body of implemented methods use File | Settings | File Templates.
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the next element in the iteration.
|
||||
*
|
||||
* @return the next element in the iteration.
|
||||
* @throws java.util.NoSuchElementException
|
||||
* iteration has no more elements.
|
||||
*/
|
||||
public GenomeLoc next() {
|
||||
return null; //To change body of implemented methods use File | Settings | File Templates.
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes from the underlying collection the last element returned by the
|
||||
* iterator (optional operation). This method can be called only once per
|
||||
* call to <tt>next</tt>. The behavior of an iterator is unspecified if
|
||||
* the underlying collection is modified while the iteration is in
|
||||
* progress in any way other than by calling this method.
|
||||
*
|
||||
* @throws UnsupportedOperationException if the <tt>remove</tt>
|
||||
* operation is not supported by this Iterator.
|
||||
* @throws IllegalStateException if the <tt>next</tt> method has not
|
||||
* yet been called, or the <tt>remove</tt> method has already
|
||||
* been called after the last call to the <tt>next</tt>
|
||||
* method.
|
||||
*/
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException("remove() method is not supported by OverlappingIntervalIterator");
|
||||
//To change body of implemented methods use File | Settings | File Templates.
|
||||
}
|
||||
}
|
||||
|
|
@ -41,7 +41,7 @@ import java.util.Arrays;
|
|||
|
||||
public class AlignmentUtils {
|
||||
|
||||
private static class MismatchCount {
|
||||
public static class MismatchCount {
|
||||
public int numMismatches = 0;
|
||||
public long mismatchQualities = 0;
|
||||
}
|
||||
|
|
@ -98,13 +98,13 @@ public class AlignmentUtils {
|
|||
return numMismatches(r, StringUtil.stringToBytes(refSeq), refIndex);
|
||||
}
|
||||
|
||||
private static MismatchCount getMismatchCount(SAMRecord r, byte[] refSeq, int refIndex) {
|
||||
public static MismatchCount getMismatchCount(SAMRecord r, byte[] refSeq, int refIndex) {
|
||||
return getMismatchCount(r,refSeq,refIndex,0,r.getReadLength());
|
||||
}
|
||||
|
||||
// todo -- this code and mismatchesInRefWindow should be combined and optimized into a single
|
||||
// todo -- high performance implementation. We can do a lot better than this right now
|
||||
private static MismatchCount getMismatchCount(SAMRecord r, byte[] refSeq, int refIndex, int startOnRead, int nReadBases) {
|
||||
public static MismatchCount getMismatchCount(SAMRecord r, byte[] refSeq, int refIndex, int startOnRead, int nReadBases) {
|
||||
MismatchCount mc = new MismatchCount();
|
||||
|
||||
int readIdx = 0;
|
||||
|
|
@ -387,6 +387,15 @@ public class AlignmentUtils {
|
|||
return true;
|
||||
}
|
||||
|
||||
/** Returns true is read is mapped and mapped uniquely (Q>0).
|
||||
*
|
||||
* @param read
|
||||
* @return
|
||||
*/
|
||||
public static boolean isReadUniquelyMapped(SAMRecord read) {
|
||||
return ( ! AlignmentUtils.isReadUnmapped(read) ) && read.getMappingQuality() > 0;
|
||||
}
|
||||
|
||||
/** Returns the array of base qualitites in the order the bases were read on the machine (i.e. always starting from
|
||||
* cycle 1). In other words, if the read is unmapped or aligned in the forward direction, the read's own base
|
||||
* qualities are returned as stored in the SAM record; if the read is aligned in the reverse direction, the array
|
||||
|
|
|
|||
Loading…
Reference in New Issue