gatk-3.8/java/lib/edu/mit/broad/cnv/util/GenomeBaseIndex.java

185 lines
5.5 KiB
Java

/*
* The Broad Institute
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
* This software and its documentation are copyright 2009 by the
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
*
* This software is supplied without any warranty or guaranteed support whatsoever.
* Neither the Broad Institute nor MIT can be responsible for its use, misuse,
* or functionality.
*/
package edu.mit.broad.cnv.util;
import java.io.*;
import java.util.*;
/**
* Utility class for transforming between a linear base index
* and a chromsome + position coordinate system.
*/
public class GenomeBaseIndex {
private List<String> mSequenceNames = null;
private int[] mLengths = null;
private long[] mOffsets = null;
private GenomeBaseIndex() {
}
public static GenomeBaseIndex read(File file)
throws IOException {
Reader reader = new BufferedReader(new FileReader(file));
try {
return read(reader);
} finally {
reader.close();
}
}
// The input is just a list of space-delimited sequence name and length.
public static GenomeBaseIndex read(Reader reader)
throws IOException {
List<String> sequenceNames = new ArrayList<String>();
List<Integer> sequenceLengths = new ArrayList<Integer>();
BufferedReader bufferedReader = new BufferedReader(reader);
while (true) {
String line = bufferedReader.readLine();
if (line == null) {
break;
}
String text = line.trim();
if (text.length() == 0 || text.startsWith("#")) {
continue;
}
String[] fields = text.split("\\s+");
if (fields.length < 2) {
throw new RuntimeException("Invalid input line: " + line);
}
int length = Integer.parseInt(fields[1]);
if (length <= 0) {
throw new RuntimeException("Invalid sequence length: " + length);
}
sequenceNames.add(fields[0]);
sequenceLengths.add(length);
}
int count = sequenceLengths.size();
int[] lengths = new int[count];
long[] offsets = new long[count];
long offset = 0;
for (int i = 0; i < count; i++) {
lengths[i] = sequenceLengths.get(i);
offsets[i] = offset;
offset += lengths[i];
}
GenomeBaseIndex result = new GenomeBaseIndex();
result.mSequenceNames = sequenceNames;
result.mLengths = lengths;
result.mOffsets = offsets;
return result;
}
public List<String> getSequenceNames() {
return mSequenceNames;
}
public boolean contains(String seqName) {
return (getSequenceIndex(seqName) >= 0);
}
public long getFirstIndex(String seqName) {
int index = getSequenceIndex(seqName);
if (index < 0) {
return -1;
}
return mOffsets[index];
}
public long getLastIndex(String seqName) {
int index = getSequenceIndex(seqName);
if (index < 0) {
return -1;
}
return (mOffsets[index] + mLengths[index] - 1);
}
public int getSequenceLength(String seqName) {
int index = getSequenceIndex(seqName);
if (index < 0) {
return 0;
}
return mLengths[index];
}
public long getBaseIndex(String seqName, int position) {
int index = getSequenceIndex(seqName);
if (index < 0) {
return -1;
}
if (position > mLengths[index]) {
return -1;
}
if (position < 1) {
// Zero or negative position means last base index
position = mLengths[index];
}
return (mOffsets[index] + position - 1);
}
public String getSequenceName(long baseIndex) {
int index = getSequenceIndex(baseIndex);
if (index < 0) {
return null;
}
return mSequenceNames.get(index);
}
public int getPosition(long baseIndex) {
if (baseIndex < 0) {
// Catch common sign-extension error when packing indexes as ints.
throw new IllegalArgumentException("Invalid base index: " + baseIndex);
}
int index = getSequenceIndex(baseIndex);
if (index < 0) {
return 0;
}
long offset = mOffsets[index];
long result = baseIndex - offset + 1;
return (int) result;
}
// Same as getSequenceName, but treat the argument as an unsigned int.
// This is useful for manipulating/storing indexes for the human
// genome as 4-byte unsigned ints.
public String getSequenceNameUnsigned(int baseIndex) {
return getSequenceName(baseIndex & 0xFFFFFFFFL);
}
// Same as getPosition, but treat the argument as an unsigned int.
// This is useful for manipulating/storing indexes for the human
// genome as 4-byte unsigned ints.
public int getPositionUnsigned(int baseIndex) {
return getPosition(baseIndex & 0xFFFFFFFFL);
}
private int getSequenceIndex(String seqName) {
return mSequenceNames.indexOf(seqName);
}
private int getSequenceIndex(long baseIndex) {
long offset = 0;
if (baseIndex < 0) {
return -1;
}
for (int i = 0; i < mLengths.length; i++) {
int length = mLengths[i];
if (offset + length > baseIndex) {
return i;
}
offset += length;
}
return -1;
}
}