Cleanup emergency fixes for out-of-bounds issues in reference retrieval. Fix spelling mistakes.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1173 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
000d92a545
commit
d19366eaad
|
|
@ -70,17 +70,12 @@ public class LocusReferenceView extends ReferenceView {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Allow the user to pull reference info from any arbitrary region of the reference.
|
* Allow the user to pull reference info from any arbitrary region of the reference.
|
||||||
* Assume the user has already performed all necessary bounds checking.
|
|
||||||
* TODO: This function is nearly identical to that in the ReadReferenceView. Merge the common functionality.
|
|
||||||
* @param genomeLoc The locus.
|
* @param genomeLoc The locus.
|
||||||
* @return A list of the bases starting at the start of the locus (inclusive) and ending
|
* @return A list of the bases starting at the start of the locus (inclusive) and ending
|
||||||
* at the end of the locus (inclusive).
|
* at the end of the locus (inclusive).
|
||||||
*/
|
*/
|
||||||
public char[] getReferenceBases( GenomeLoc genomeLoc ) {
|
public char[] getReferenceBases( GenomeLoc genomeLoc ) {
|
||||||
SAMSequenceRecord sequenceInfo = reference.getSequenceDictionary().getSequence(genomeLoc.getContig());
|
return super.getReferenceBases(genomeLoc);
|
||||||
long stop = Math.min( genomeLoc.getStop(), sequenceInfo.getSequenceLength() );
|
|
||||||
ReferenceSequence subsequence = reference.getSubsequenceAt(genomeLoc.getContig(),genomeLoc.getStart(),stop);
|
|
||||||
return (StringUtil.bytesToString(subsequence.getBases()) + Utils.dupString('X', (int)(genomeLoc.getStop() - stop)) ).toCharArray();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,8 @@ import net.sf.samtools.SAMSequenceRecord;
|
||||||
import net.sf.samtools.util.StringUtil;
|
import net.sf.samtools.util.StringUtil;
|
||||||
import net.sf.picard.reference.ReferenceSequence;
|
import net.sf.picard.reference.ReferenceSequence;
|
||||||
import org.broadinstitute.sting.utils.Utils;
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2009 The Broad Institute
|
* Copyright (c) 2009 The Broad Institute
|
||||||
*
|
*
|
||||||
|
|
@ -62,17 +64,7 @@ public class ReadReferenceView extends ReferenceView {
|
||||||
public char[] getReferenceBases( SAMRecord read ) {
|
public char[] getReferenceBases( SAMRecord read ) {
|
||||||
if (read.getReadUnmappedFlag())
|
if (read.getReadUnmappedFlag())
|
||||||
return null;
|
return null;
|
||||||
|
return getReferenceBases( GenomeLocParser.createGenomeLoc(read) );
|
||||||
String contig = read.getReferenceName();
|
|
||||||
int start = read.getAlignmentStart();
|
|
||||||
int stop = read.getAlignmentEnd();
|
|
||||||
|
|
||||||
SAMSequenceRecord sequenceRecord = reference.getSequenceDictionary().getSequence(contig);
|
|
||||||
if (stop > sequenceRecord.getSequenceLength())
|
|
||||||
stop = sequenceRecord.getSequenceLength();
|
|
||||||
|
|
||||||
ReferenceSequence alignmentToReference = reference.getSubsequenceAt(contig, start, stop);
|
|
||||||
return ( StringUtil.bytesToString(alignmentToReference.getBases()) + Utils.dupString('X', read.getAlignmentEnd() - stop) ).toCharArray();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,15 @@
|
||||||
package org.broadinstitute.sting.gatk.datasources.providers;
|
package org.broadinstitute.sting.gatk.datasources.providers;
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
|
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
|
|
||||||
|
import net.sf.samtools.SAMSequenceRecord;
|
||||||
|
import net.sf.samtools.util.StringUtil;
|
||||||
|
import net.sf.picard.reference.ReferenceSequence;
|
||||||
/**
|
/**
|
||||||
* User: hanna
|
* User: hanna
|
||||||
* Date: May 22, 2009
|
* Date: May 22, 2009
|
||||||
|
|
@ -46,4 +52,18 @@ public class ReferenceView implements View {
|
||||||
public void close() {
|
public void close() {
|
||||||
reference = null;
|
reference = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Allow the user to pull reference info from any arbitrary region of the reference.
|
||||||
|
* If parts of the reference don't exist, mark them in the char array with 'X'es.
|
||||||
|
* @param genomeLoc The locus.
|
||||||
|
* @return A list of the bases starting at the start of the locus (inclusive) and ending
|
||||||
|
* at the end of the locus (inclusive).
|
||||||
|
*/
|
||||||
|
protected char[] getReferenceBases( GenomeLoc genomeLoc ) {
|
||||||
|
SAMSequenceRecord sequenceInfo = reference.getSequenceDictionary().getSequence(genomeLoc.getContig());
|
||||||
|
long stop = Math.min( genomeLoc.getStop(), sequenceInfo.getSequenceLength() );
|
||||||
|
ReferenceSequence subsequence = reference.getSubsequenceAt(genomeLoc.getContig(),genomeLoc.getStart(),stop);
|
||||||
|
return (StringUtil.bytesToString(subsequence.getBases()) + Utils.dupString('X', (int)(genomeLoc.getStop() - stop)) ).toCharArray();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -58,7 +58,7 @@ abstract public class BasicPileup implements Pileup {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static ArrayList<Byte> basePileup( List<SAMRecord> reads, List<Integer> offsets ) {
|
public static ArrayList<Byte> basePileup( List<SAMRecord> reads, List<Integer> offsets ) {
|
||||||
ArrayList<Byte> bases = new ArrayList(reads.size());
|
ArrayList<Byte> bases = new ArrayList<Byte>(reads.size());
|
||||||
for ( int i = 0; i < reads.size(); i++ ) {
|
for ( int i = 0; i < reads.size(); i++ ) {
|
||||||
SAMRecord read = reads.get(i);
|
SAMRecord read = reads.get(i);
|
||||||
int offset = offsets.get(i);
|
int offset = offsets.get(i);
|
||||||
|
|
@ -68,7 +68,7 @@ abstract public class BasicPileup implements Pileup {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static ArrayList<Byte> qualPileup( List<SAMRecord> reads, List<Integer> offsets ) {
|
public static ArrayList<Byte> qualPileup( List<SAMRecord> reads, List<Integer> offsets ) {
|
||||||
ArrayList<Byte> quals = new ArrayList(reads.size());
|
ArrayList<Byte> quals = new ArrayList<Byte>(reads.size());
|
||||||
for ( int i = 0; i < reads.size(); i++ ) {
|
for ( int i = 0; i < reads.size(); i++ ) {
|
||||||
SAMRecord read = reads.get(i);
|
SAMRecord read = reads.get(i);
|
||||||
int offset = offsets.get(i);
|
int offset = offsets.get(i);
|
||||||
|
|
@ -79,7 +79,7 @@ abstract public class BasicPileup implements Pileup {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static ArrayList<Byte> mappingQualPileup( List<SAMRecord> reads) {
|
public static ArrayList<Byte> mappingQualPileup( List<SAMRecord> reads) {
|
||||||
ArrayList<Byte> quals = new ArrayList(reads.size());
|
ArrayList<Byte> quals = new ArrayList<Byte>(reads.size());
|
||||||
for ( int i = 0; i < reads.size(); i++ ) {
|
for ( int i = 0; i < reads.size(); i++ ) {
|
||||||
SAMRecord read = reads.get(i);
|
SAMRecord read = reads.get(i);
|
||||||
byte qual = (byte)read.getMappingQuality();
|
byte qual = (byte)read.getMappingQuality();
|
||||||
|
|
@ -349,10 +349,10 @@ abstract public class BasicPileup implements Pileup {
|
||||||
|
|
||||||
public static byte consensusBase(String bases) {
|
public static byte consensusBase(String bases) {
|
||||||
String canon = bases.toUpperCase();
|
String canon = bases.toUpperCase();
|
||||||
int ACount = Utils.countOccurances('A', bases);
|
int ACount = Utils.countOccurrences('A', bases);
|
||||||
int CCount = Utils.countOccurances('C', bases);
|
int CCount = Utils.countOccurrences('C', bases);
|
||||||
int TCount = Utils.countOccurances('T', bases);
|
int TCount = Utils.countOccurrences('T', bases);
|
||||||
int GCount = Utils.countOccurances('G', bases);
|
int GCount = Utils.countOccurrences('G', bases);
|
||||||
|
|
||||||
int m = Math.max(ACount, Math.max(CCount, Math.max(TCount, GCount)));
|
int m = Math.max(ACount, Math.max(CCount, Math.max(TCount, GCount)));
|
||||||
if ( ACount == m ) return 'A';
|
if ( ACount == m ) return 'A';
|
||||||
|
|
|
||||||
|
|
@ -437,12 +437,11 @@ public class Utils {
|
||||||
|
|
||||||
public static String dupString( char c, int nCopies ) {
|
public static String dupString( char c, int nCopies ) {
|
||||||
char[] chars = new char[nCopies];
|
char[] chars = new char[nCopies];
|
||||||
for ( int i = 0; i < nCopies; i++ ) chars[i] = c;
|
Arrays.fill(chars,c);
|
||||||
//System.out.printf("chars is %s%n", new String(chars));
|
|
||||||
return new String(chars);
|
return new String(chars);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static int countOccurances(char c, String s) {
|
public static int countOccurrences(char c, String s) {
|
||||||
int count = 0;
|
int count = 0;
|
||||||
for ( int i = 0; i < s.length(); i++ ) {
|
for ( int i = 0; i < s.length(); i++ ) {
|
||||||
count += s.charAt(i) == c ? 1 : 0;
|
count += s.charAt(i) == c ? 1 : 0;
|
||||||
|
|
|
||||||
|
|
@ -247,7 +247,7 @@ public class DupUtils {
|
||||||
String bases = BasicPileup.basePileupAsString(duplicates, offsets);
|
String bases = BasicPileup.basePileupAsString(duplicates, offsets);
|
||||||
ArrayList<Byte> quals = BasicPileup.qualPileup(duplicates, offsets);
|
ArrayList<Byte> quals = BasicPileup.qualPileup(duplicates, offsets);
|
||||||
byte combinedBase = BasicPileup.consensusBase(bases);
|
byte combinedBase = BasicPileup.consensusBase(bases);
|
||||||
int baseMatches = Utils.countOccurances((char)combinedBase, bases);
|
int baseMatches = Utils.countOccurrences((char)combinedBase, bases);
|
||||||
double maxP = QualityUtils.qualToProb(Utils.listMaxByte(quals));
|
double maxP = QualityUtils.qualToProb(Utils.listMaxByte(quals));
|
||||||
double mismatchRate = (double)baseMatches / bases.length();
|
double mismatchRate = (double)baseMatches / bases.length();
|
||||||
byte combinedQual = QualityUtils.probToQual(Math.min(maxP, mismatchRate), 0.0);
|
byte combinedQual = QualityUtils.probToQual(Math.min(maxP, mismatchRate), 0.0);
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,59 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2009 The Broad Institute
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.utils;
|
||||||
|
|
||||||
|
import org.junit.Test;
|
||||||
|
import junit.framework.Assert;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Testing framework for general purpose utilities class.
|
||||||
|
*
|
||||||
|
* @author hanna
|
||||||
|
* @version 0.1
|
||||||
|
*/
|
||||||
|
|
||||||
|
public class UtilsTest {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testDupStringNoChars() {
|
||||||
|
String duped = Utils.dupString('a',0);
|
||||||
|
Assert.assertEquals("dupString did not produce zero-length string", 0, duped.length());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testDupStringOneChar() {
|
||||||
|
String duped = Utils.dupString('b',1);
|
||||||
|
Assert.assertEquals("dupString did not produce single character string", 1, duped.length());
|
||||||
|
Assert.assertEquals("dupString character was incorrect", 'b', duped.charAt(0));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testDupStringMultiChar() {
|
||||||
|
String duped = Utils.dupString('c',5);
|
||||||
|
Assert.assertEquals("dupString did not produce five character string", 5, duped.length());
|
||||||
|
Assert.assertEquals("dupString string was incorrect", "ccccc", duped);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue