Cleanup emergency fixes for out-of-bounds issues in reference retrieval. Fix spelling mistakes.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1173 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
hanna 2009-07-06 15:41:30 +00:00
parent 000d92a545
commit d19366eaad
7 changed files with 93 additions and 28 deletions

View File

@ -70,17 +70,12 @@ public class LocusReferenceView extends ReferenceView {
/**
* Allow the user to pull reference info from any arbitrary region of the reference.
* Assume the user has already performed all necessary bounds checking.
* TODO: This function is nearly identical to that in the ReadReferenceView. Merge the common functionality.
* @param genomeLoc The locus.
* @return A list of the bases starting at the start of the locus (inclusive) and ending
* at the end of the locus (inclusive).
*/
public char[] getReferenceBases( GenomeLoc genomeLoc ) {
SAMSequenceRecord sequenceInfo = reference.getSequenceDictionary().getSequence(genomeLoc.getContig());
long stop = Math.min( genomeLoc.getStop(), sequenceInfo.getSequenceLength() );
ReferenceSequence subsequence = reference.getSubsequenceAt(genomeLoc.getContig(),genomeLoc.getStart(),stop);
return (StringUtil.bytesToString(subsequence.getBases()) + Utils.dupString('X', (int)(genomeLoc.getStop() - stop)) ).toCharArray();
return super.getReferenceBases(genomeLoc);
}
/**

View File

@ -5,6 +5,8 @@ import net.sf.samtools.SAMSequenceRecord;
import net.sf.samtools.util.StringUtil;
import net.sf.picard.reference.ReferenceSequence;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
/*
* Copyright (c) 2009 The Broad Institute
*
@ -62,17 +64,7 @@ public class ReadReferenceView extends ReferenceView {
public char[] getReferenceBases( SAMRecord read ) {
if (read.getReadUnmappedFlag())
return null;
String contig = read.getReferenceName();
int start = read.getAlignmentStart();
int stop = read.getAlignmentEnd();
SAMSequenceRecord sequenceRecord = reference.getSequenceDictionary().getSequence(contig);
if (stop > sequenceRecord.getSequenceLength())
stop = sequenceRecord.getSequenceLength();
ReferenceSequence alignmentToReference = reference.getSubsequenceAt(contig, start, stop);
return ( StringUtil.bytesToString(alignmentToReference.getBases()) + Utils.dupString('X', read.getAlignmentEnd() - stop) ).toCharArray();
return getReferenceBases( GenomeLocParser.createGenomeLoc(read) );
}
}

View File

@ -1,9 +1,15 @@
package org.broadinstitute.sting.gatk.datasources.providers;
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.Utils;
import java.util.Collections;
import java.util.Collection;
import net.sf.samtools.SAMSequenceRecord;
import net.sf.samtools.util.StringUtil;
import net.sf.picard.reference.ReferenceSequence;
/**
* User: hanna
* Date: May 22, 2009
@ -46,4 +52,18 @@ public class ReferenceView implements View {
public void close() {
reference = null;
}
/**
* Allow the user to pull reference info from any arbitrary region of the reference.
* If parts of the reference don't exist, mark them in the char array with 'X'es.
* @param genomeLoc The locus.
* @return A list of the bases starting at the start of the locus (inclusive) and ending
* at the end of the locus (inclusive).
*/
protected char[] getReferenceBases( GenomeLoc genomeLoc ) {
SAMSequenceRecord sequenceInfo = reference.getSequenceDictionary().getSequence(genomeLoc.getContig());
long stop = Math.min( genomeLoc.getStop(), sequenceInfo.getSequenceLength() );
ReferenceSequence subsequence = reference.getSubsequenceAt(genomeLoc.getContig(),genomeLoc.getStart(),stop);
return (StringUtil.bytesToString(subsequence.getBases()) + Utils.dupString('X', (int)(genomeLoc.getStop() - stop)) ).toCharArray();
}
}

View File

@ -58,7 +58,7 @@ abstract public class BasicPileup implements Pileup {
}
public static ArrayList<Byte> basePileup( List<SAMRecord> reads, List<Integer> offsets ) {
ArrayList<Byte> bases = new ArrayList(reads.size());
ArrayList<Byte> bases = new ArrayList<Byte>(reads.size());
for ( int i = 0; i < reads.size(); i++ ) {
SAMRecord read = reads.get(i);
int offset = offsets.get(i);
@ -68,7 +68,7 @@ abstract public class BasicPileup implements Pileup {
}
public static ArrayList<Byte> qualPileup( List<SAMRecord> reads, List<Integer> offsets ) {
ArrayList<Byte> quals = new ArrayList(reads.size());
ArrayList<Byte> quals = new ArrayList<Byte>(reads.size());
for ( int i = 0; i < reads.size(); i++ ) {
SAMRecord read = reads.get(i);
int offset = offsets.get(i);
@ -79,7 +79,7 @@ abstract public class BasicPileup implements Pileup {
}
public static ArrayList<Byte> mappingQualPileup( List<SAMRecord> reads) {
ArrayList<Byte> quals = new ArrayList(reads.size());
ArrayList<Byte> quals = new ArrayList<Byte>(reads.size());
for ( int i = 0; i < reads.size(); i++ ) {
SAMRecord read = reads.get(i);
byte qual = (byte)read.getMappingQuality();
@ -349,10 +349,10 @@ abstract public class BasicPileup implements Pileup {
public static byte consensusBase(String bases) {
String canon = bases.toUpperCase();
int ACount = Utils.countOccurances('A', bases);
int CCount = Utils.countOccurances('C', bases);
int TCount = Utils.countOccurances('T', bases);
int GCount = Utils.countOccurances('G', bases);
int ACount = Utils.countOccurrences('A', bases);
int CCount = Utils.countOccurrences('C', bases);
int TCount = Utils.countOccurrences('T', bases);
int GCount = Utils.countOccurrences('G', bases);
int m = Math.max(ACount, Math.max(CCount, Math.max(TCount, GCount)));
if ( ACount == m ) return 'A';

View File

@ -437,12 +437,11 @@ public class Utils {
public static String dupString( char c, int nCopies ) {
char[] chars = new char[nCopies];
for ( int i = 0; i < nCopies; i++ ) chars[i] = c;
//System.out.printf("chars is %s%n", new String(chars));
Arrays.fill(chars,c);
return new String(chars);
}
public static int countOccurances(char c, String s) {
public static int countOccurrences(char c, String s) {
int count = 0;
for ( int i = 0; i < s.length(); i++ ) {
count += s.charAt(i) == c ? 1 : 0;

View File

@ -247,7 +247,7 @@ public class DupUtils {
String bases = BasicPileup.basePileupAsString(duplicates, offsets);
ArrayList<Byte> quals = BasicPileup.qualPileup(duplicates, offsets);
byte combinedBase = BasicPileup.consensusBase(bases);
int baseMatches = Utils.countOccurances((char)combinedBase, bases);
int baseMatches = Utils.countOccurrences((char)combinedBase, bases);
double maxP = QualityUtils.qualToProb(Utils.listMaxByte(quals));
double mismatchRate = (double)baseMatches / bases.length();
byte combinedQual = QualityUtils.probToQual(Math.min(maxP, mismatchRate), 0.0);

View File

@ -0,0 +1,59 @@
/*
* Copyright (c) 2009 The Broad Institute
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils;
import org.junit.Test;
import junit.framework.Assert;
/**
* Testing framework for general purpose utilities class.
*
* @author hanna
* @version 0.1
*/
public class UtilsTest {
@Test
public void testDupStringNoChars() {
String duped = Utils.dupString('a',0);
Assert.assertEquals("dupString did not produce zero-length string", 0, duped.length());
}
@Test
public void testDupStringOneChar() {
String duped = Utils.dupString('b',1);
Assert.assertEquals("dupString did not produce single character string", 1, duped.length());
Assert.assertEquals("dupString character was incorrect", 'b', duped.charAt(0));
}
@Test
public void testDupStringMultiChar() {
String duped = Utils.dupString('c',5);
Assert.assertEquals("dupString did not produce five character string", 5, duped.length());
Assert.assertEquals("dupString string was incorrect", "ccccc", duped);
}
}