diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusReferenceView.java b/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusReferenceView.java index 27b40a63b..69c50f2cd 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusReferenceView.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusReferenceView.java @@ -70,17 +70,12 @@ public class LocusReferenceView extends ReferenceView { /** * Allow the user to pull reference info from any arbitrary region of the reference. - * Assume the user has already performed all necessary bounds checking. - * TODO: This function is nearly identical to that in the ReadReferenceView. Merge the common functionality. * @param genomeLoc The locus. * @return A list of the bases starting at the start of the locus (inclusive) and ending * at the end of the locus (inclusive). */ public char[] getReferenceBases( GenomeLoc genomeLoc ) { - SAMSequenceRecord sequenceInfo = reference.getSequenceDictionary().getSequence(genomeLoc.getContig()); - long stop = Math.min( genomeLoc.getStop(), sequenceInfo.getSequenceLength() ); - ReferenceSequence subsequence = reference.getSubsequenceAt(genomeLoc.getContig(),genomeLoc.getStart(),stop); - return (StringUtil.bytesToString(subsequence.getBases()) + Utils.dupString('X', (int)(genomeLoc.getStop() - stop)) ).toCharArray(); + return super.getReferenceBases(genomeLoc); } /** diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadReferenceView.java b/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadReferenceView.java index 4aa6cc340..74e5326ed 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadReferenceView.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadReferenceView.java @@ -5,6 +5,8 @@ import net.sf.samtools.SAMSequenceRecord; import net.sf.samtools.util.StringUtil; import net.sf.picard.reference.ReferenceSequence; import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; /* * Copyright (c) 2009 The Broad Institute * @@ -62,17 +64,7 @@ public class ReadReferenceView extends ReferenceView { public char[] getReferenceBases( SAMRecord read ) { if (read.getReadUnmappedFlag()) return null; - - String contig = read.getReferenceName(); - int start = read.getAlignmentStart(); - int stop = read.getAlignmentEnd(); - - SAMSequenceRecord sequenceRecord = reference.getSequenceDictionary().getSequence(contig); - if (stop > sequenceRecord.getSequenceLength()) - stop = sequenceRecord.getSequenceLength(); - - ReferenceSequence alignmentToReference = reference.getSubsequenceAt(contig, start, stop); - return ( StringUtil.bytesToString(alignmentToReference.getBases()) + Utils.dupString('X', read.getAlignmentEnd() - stop) ).toCharArray(); + return getReferenceBases( GenomeLocParser.createGenomeLoc(read) ); } } diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReferenceView.java b/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReferenceView.java index 367dd204c..d75e23d61 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReferenceView.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReferenceView.java @@ -1,9 +1,15 @@ package org.broadinstitute.sting.gatk.datasources.providers; import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.Utils; import java.util.Collections; import java.util.Collection; + +import net.sf.samtools.SAMSequenceRecord; +import net.sf.samtools.util.StringUtil; +import net.sf.picard.reference.ReferenceSequence; /** * User: hanna * Date: May 22, 2009 @@ -46,4 +52,18 @@ public class ReferenceView implements View { public void close() { reference = null; } + + /** + * Allow the user to pull reference info from any arbitrary region of the reference. + * If parts of the reference don't exist, mark them in the char array with 'X'es. + * @param genomeLoc The locus. + * @return A list of the bases starting at the start of the locus (inclusive) and ending + * at the end of the locus (inclusive). + */ + protected char[] getReferenceBases( GenomeLoc genomeLoc ) { + SAMSequenceRecord sequenceInfo = reference.getSequenceDictionary().getSequence(genomeLoc.getContig()); + long stop = Math.min( genomeLoc.getStop(), sequenceInfo.getSequenceLength() ); + ReferenceSequence subsequence = reference.getSubsequenceAt(genomeLoc.getContig(),genomeLoc.getStart(),stop); + return (StringUtil.bytesToString(subsequence.getBases()) + Utils.dupString('X', (int)(genomeLoc.getStop() - stop)) ).toCharArray(); + } } diff --git a/java/src/org/broadinstitute/sting/utils/BasicPileup.java b/java/src/org/broadinstitute/sting/utils/BasicPileup.java index 55a12b981..2d3d92cc0 100755 --- a/java/src/org/broadinstitute/sting/utils/BasicPileup.java +++ b/java/src/org/broadinstitute/sting/utils/BasicPileup.java @@ -58,7 +58,7 @@ abstract public class BasicPileup implements Pileup { } public static ArrayList basePileup( List reads, List offsets ) { - ArrayList bases = new ArrayList(reads.size()); + ArrayList bases = new ArrayList(reads.size()); for ( int i = 0; i < reads.size(); i++ ) { SAMRecord read = reads.get(i); int offset = offsets.get(i); @@ -68,7 +68,7 @@ abstract public class BasicPileup implements Pileup { } public static ArrayList qualPileup( List reads, List offsets ) { - ArrayList quals = new ArrayList(reads.size()); + ArrayList quals = new ArrayList(reads.size()); for ( int i = 0; i < reads.size(); i++ ) { SAMRecord read = reads.get(i); int offset = offsets.get(i); @@ -79,7 +79,7 @@ abstract public class BasicPileup implements Pileup { } public static ArrayList mappingQualPileup( List reads) { - ArrayList quals = new ArrayList(reads.size()); + ArrayList quals = new ArrayList(reads.size()); for ( int i = 0; i < reads.size(); i++ ) { SAMRecord read = reads.get(i); byte qual = (byte)read.getMappingQuality(); @@ -349,10 +349,10 @@ abstract public class BasicPileup implements Pileup { public static byte consensusBase(String bases) { String canon = bases.toUpperCase(); - int ACount = Utils.countOccurances('A', bases); - int CCount = Utils.countOccurances('C', bases); - int TCount = Utils.countOccurances('T', bases); - int GCount = Utils.countOccurances('G', bases); + int ACount = Utils.countOccurrences('A', bases); + int CCount = Utils.countOccurrences('C', bases); + int TCount = Utils.countOccurrences('T', bases); + int GCount = Utils.countOccurrences('G', bases); int m = Math.max(ACount, Math.max(CCount, Math.max(TCount, GCount))); if ( ACount == m ) return 'A'; diff --git a/java/src/org/broadinstitute/sting/utils/Utils.java b/java/src/org/broadinstitute/sting/utils/Utils.java index ea7463e20..a924e4d3c 100755 --- a/java/src/org/broadinstitute/sting/utils/Utils.java +++ b/java/src/org/broadinstitute/sting/utils/Utils.java @@ -437,12 +437,11 @@ public class Utils { public static String dupString( char c, int nCopies ) { char[] chars = new char[nCopies]; - for ( int i = 0; i < nCopies; i++ ) chars[i] = c; - //System.out.printf("chars is %s%n", new String(chars)); + Arrays.fill(chars,c); return new String(chars); } - public static int countOccurances(char c, String s) { + public static int countOccurrences(char c, String s) { int count = 0; for ( int i = 0; i < s.length(); i++ ) { count += s.charAt(i) == c ? 1 : 0; diff --git a/java/src/org/broadinstitute/sting/utils/duplicates/DupUtils.java b/java/src/org/broadinstitute/sting/utils/duplicates/DupUtils.java index 925c6c8f6..0358ba457 100644 --- a/java/src/org/broadinstitute/sting/utils/duplicates/DupUtils.java +++ b/java/src/org/broadinstitute/sting/utils/duplicates/DupUtils.java @@ -247,7 +247,7 @@ public class DupUtils { String bases = BasicPileup.basePileupAsString(duplicates, offsets); ArrayList quals = BasicPileup.qualPileup(duplicates, offsets); byte combinedBase = BasicPileup.consensusBase(bases); - int baseMatches = Utils.countOccurances((char)combinedBase, bases); + int baseMatches = Utils.countOccurrences((char)combinedBase, bases); double maxP = QualityUtils.qualToProb(Utils.listMaxByte(quals)); double mismatchRate = (double)baseMatches / bases.length(); byte combinedQual = QualityUtils.probToQual(Math.min(maxP, mismatchRate), 0.0); diff --git a/java/test/org/broadinstitute/sting/utils/UtilsTest.java b/java/test/org/broadinstitute/sting/utils/UtilsTest.java new file mode 100644 index 000000000..e0cd2bdca --- /dev/null +++ b/java/test/org/broadinstitute/sting/utils/UtilsTest.java @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2009 The Broad Institute + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils; + +import org.junit.Test; +import junit.framework.Assert; + +/** + * Testing framework for general purpose utilities class. + * + * @author hanna + * @version 0.1 + */ + +public class UtilsTest { + + @Test + public void testDupStringNoChars() { + String duped = Utils.dupString('a',0); + Assert.assertEquals("dupString did not produce zero-length string", 0, duped.length()); + } + + @Test + public void testDupStringOneChar() { + String duped = Utils.dupString('b',1); + Assert.assertEquals("dupString did not produce single character string", 1, duped.length()); + Assert.assertEquals("dupString character was incorrect", 'b', duped.charAt(0)); + } + + @Test + public void testDupStringMultiChar() { + String duped = Utils.dupString('c',5); + Assert.assertEquals("dupString did not produce five character string", 5, duped.length()); + Assert.assertEquals("dupString string was incorrect", "ccccc", duped); + } + +}