Cleanup emergency fixes for out-of-bounds issues in reference retrieval. Fix spelling mistakes.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1173 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
000d92a545
commit
d19366eaad
|
|
@ -70,17 +70,12 @@ public class LocusReferenceView extends ReferenceView {
|
|||
|
||||
/**
|
||||
* Allow the user to pull reference info from any arbitrary region of the reference.
|
||||
* Assume the user has already performed all necessary bounds checking.
|
||||
* TODO: This function is nearly identical to that in the ReadReferenceView. Merge the common functionality.
|
||||
* @param genomeLoc The locus.
|
||||
* @return A list of the bases starting at the start of the locus (inclusive) and ending
|
||||
* at the end of the locus (inclusive).
|
||||
*/
|
||||
public char[] getReferenceBases( GenomeLoc genomeLoc ) {
|
||||
SAMSequenceRecord sequenceInfo = reference.getSequenceDictionary().getSequence(genomeLoc.getContig());
|
||||
long stop = Math.min( genomeLoc.getStop(), sequenceInfo.getSequenceLength() );
|
||||
ReferenceSequence subsequence = reference.getSubsequenceAt(genomeLoc.getContig(),genomeLoc.getStart(),stop);
|
||||
return (StringUtil.bytesToString(subsequence.getBases()) + Utils.dupString('X', (int)(genomeLoc.getStop() - stop)) ).toCharArray();
|
||||
return super.getReferenceBases(genomeLoc);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -5,6 +5,8 @@ import net.sf.samtools.SAMSequenceRecord;
|
|||
import net.sf.samtools.util.StringUtil;
|
||||
import net.sf.picard.reference.ReferenceSequence;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
|
|
@ -62,17 +64,7 @@ public class ReadReferenceView extends ReferenceView {
|
|||
public char[] getReferenceBases( SAMRecord read ) {
|
||||
if (read.getReadUnmappedFlag())
|
||||
return null;
|
||||
|
||||
String contig = read.getReferenceName();
|
||||
int start = read.getAlignmentStart();
|
||||
int stop = read.getAlignmentEnd();
|
||||
|
||||
SAMSequenceRecord sequenceRecord = reference.getSequenceDictionary().getSequence(contig);
|
||||
if (stop > sequenceRecord.getSequenceLength())
|
||||
stop = sequenceRecord.getSequenceLength();
|
||||
|
||||
ReferenceSequence alignmentToReference = reference.getSubsequenceAt(contig, start, stop);
|
||||
return ( StringUtil.bytesToString(alignmentToReference.getBases()) + Utils.dupString('X', read.getAlignmentEnd() - stop) ).toCharArray();
|
||||
return getReferenceBases( GenomeLocParser.createGenomeLoc(read) );
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,9 +1,15 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.providers;
|
||||
|
||||
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.Collection;
|
||||
|
||||
import net.sf.samtools.SAMSequenceRecord;
|
||||
import net.sf.samtools.util.StringUtil;
|
||||
import net.sf.picard.reference.ReferenceSequence;
|
||||
/**
|
||||
* User: hanna
|
||||
* Date: May 22, 2009
|
||||
|
|
@ -46,4 +52,18 @@ public class ReferenceView implements View {
|
|||
public void close() {
|
||||
reference = null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Allow the user to pull reference info from any arbitrary region of the reference.
|
||||
* If parts of the reference don't exist, mark them in the char array with 'X'es.
|
||||
* @param genomeLoc The locus.
|
||||
* @return A list of the bases starting at the start of the locus (inclusive) and ending
|
||||
* at the end of the locus (inclusive).
|
||||
*/
|
||||
protected char[] getReferenceBases( GenomeLoc genomeLoc ) {
|
||||
SAMSequenceRecord sequenceInfo = reference.getSequenceDictionary().getSequence(genomeLoc.getContig());
|
||||
long stop = Math.min( genomeLoc.getStop(), sequenceInfo.getSequenceLength() );
|
||||
ReferenceSequence subsequence = reference.getSubsequenceAt(genomeLoc.getContig(),genomeLoc.getStart(),stop);
|
||||
return (StringUtil.bytesToString(subsequence.getBases()) + Utils.dupString('X', (int)(genomeLoc.getStop() - stop)) ).toCharArray();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -58,7 +58,7 @@ abstract public class BasicPileup implements Pileup {
|
|||
}
|
||||
|
||||
public static ArrayList<Byte> basePileup( List<SAMRecord> reads, List<Integer> offsets ) {
|
||||
ArrayList<Byte> bases = new ArrayList(reads.size());
|
||||
ArrayList<Byte> bases = new ArrayList<Byte>(reads.size());
|
||||
for ( int i = 0; i < reads.size(); i++ ) {
|
||||
SAMRecord read = reads.get(i);
|
||||
int offset = offsets.get(i);
|
||||
|
|
@ -68,7 +68,7 @@ abstract public class BasicPileup implements Pileup {
|
|||
}
|
||||
|
||||
public static ArrayList<Byte> qualPileup( List<SAMRecord> reads, List<Integer> offsets ) {
|
||||
ArrayList<Byte> quals = new ArrayList(reads.size());
|
||||
ArrayList<Byte> quals = new ArrayList<Byte>(reads.size());
|
||||
for ( int i = 0; i < reads.size(); i++ ) {
|
||||
SAMRecord read = reads.get(i);
|
||||
int offset = offsets.get(i);
|
||||
|
|
@ -79,7 +79,7 @@ abstract public class BasicPileup implements Pileup {
|
|||
}
|
||||
|
||||
public static ArrayList<Byte> mappingQualPileup( List<SAMRecord> reads) {
|
||||
ArrayList<Byte> quals = new ArrayList(reads.size());
|
||||
ArrayList<Byte> quals = new ArrayList<Byte>(reads.size());
|
||||
for ( int i = 0; i < reads.size(); i++ ) {
|
||||
SAMRecord read = reads.get(i);
|
||||
byte qual = (byte)read.getMappingQuality();
|
||||
|
|
@ -349,10 +349,10 @@ abstract public class BasicPileup implements Pileup {
|
|||
|
||||
public static byte consensusBase(String bases) {
|
||||
String canon = bases.toUpperCase();
|
||||
int ACount = Utils.countOccurances('A', bases);
|
||||
int CCount = Utils.countOccurances('C', bases);
|
||||
int TCount = Utils.countOccurances('T', bases);
|
||||
int GCount = Utils.countOccurances('G', bases);
|
||||
int ACount = Utils.countOccurrences('A', bases);
|
||||
int CCount = Utils.countOccurrences('C', bases);
|
||||
int TCount = Utils.countOccurrences('T', bases);
|
||||
int GCount = Utils.countOccurrences('G', bases);
|
||||
|
||||
int m = Math.max(ACount, Math.max(CCount, Math.max(TCount, GCount)));
|
||||
if ( ACount == m ) return 'A';
|
||||
|
|
|
|||
|
|
@ -437,12 +437,11 @@ public class Utils {
|
|||
|
||||
public static String dupString( char c, int nCopies ) {
|
||||
char[] chars = new char[nCopies];
|
||||
for ( int i = 0; i < nCopies; i++ ) chars[i] = c;
|
||||
//System.out.printf("chars is %s%n", new String(chars));
|
||||
Arrays.fill(chars,c);
|
||||
return new String(chars);
|
||||
}
|
||||
|
||||
public static int countOccurances(char c, String s) {
|
||||
public static int countOccurrences(char c, String s) {
|
||||
int count = 0;
|
||||
for ( int i = 0; i < s.length(); i++ ) {
|
||||
count += s.charAt(i) == c ? 1 : 0;
|
||||
|
|
|
|||
|
|
@ -247,7 +247,7 @@ public class DupUtils {
|
|||
String bases = BasicPileup.basePileupAsString(duplicates, offsets);
|
||||
ArrayList<Byte> quals = BasicPileup.qualPileup(duplicates, offsets);
|
||||
byte combinedBase = BasicPileup.consensusBase(bases);
|
||||
int baseMatches = Utils.countOccurances((char)combinedBase, bases);
|
||||
int baseMatches = Utils.countOccurrences((char)combinedBase, bases);
|
||||
double maxP = QualityUtils.qualToProb(Utils.listMaxByte(quals));
|
||||
double mismatchRate = (double)baseMatches / bases.length();
|
||||
byte combinedQual = QualityUtils.probToQual(Math.min(maxP, mismatchRate), 0.0);
|
||||
|
|
|
|||
|
|
@ -0,0 +1,59 @@
|
|||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils;
|
||||
|
||||
import org.junit.Test;
|
||||
import junit.framework.Assert;
|
||||
|
||||
/**
|
||||
* Testing framework for general purpose utilities class.
|
||||
*
|
||||
* @author hanna
|
||||
* @version 0.1
|
||||
*/
|
||||
|
||||
public class UtilsTest {
|
||||
|
||||
@Test
|
||||
public void testDupStringNoChars() {
|
||||
String duped = Utils.dupString('a',0);
|
||||
Assert.assertEquals("dupString did not produce zero-length string", 0, duped.length());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDupStringOneChar() {
|
||||
String duped = Utils.dupString('b',1);
|
||||
Assert.assertEquals("dupString did not produce single character string", 1, duped.length());
|
||||
Assert.assertEquals("dupString character was incorrect", 'b', duped.charAt(0));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDupStringMultiChar() {
|
||||
String duped = Utils.dupString('c',5);
|
||||
Assert.assertEquals("dupString did not produce five character string", 5, duped.length());
|
||||
Assert.assertEquals("dupString string was incorrect", "ccccc", duped);
|
||||
}
|
||||
|
||||
}
|
||||
Loading…
Reference in New Issue