Disabling input of known indels for use as alternate consenses. When we get rods in a read traversal, it will be trivial to hook it into the cleaner (the code is already there).

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2825 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2010-02-11 15:52:21 +00:00
parent e491b42951
commit 96fee7cf7a
1 changed files with 9 additions and 40 deletions

View File

@ -44,9 +44,6 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
@Argument(fullName="sortOnDisk", shortName="sortOnDisk", required=false, doc="Should we sort on disk instead of on the fly? This option is much slower but should be used when on-the-fly sorting fails because reads are too long [default:no]")
protected boolean SORT_ON_DISK = false;
@Argument(fullName="knownIndels", shortName="knownIndels", required=false, doc="One or more rod triplets <binding,type,path> of known indels to try for alternate consenses; types must implement VariationRod")
protected ArrayList<String> knownIndels = new ArrayList<String>();
// ADVANCED OPTIONS FOLLOW
@Argument(fullName="outputIndels", shortName="indels", required=false, doc="Output file (text) for the indels found")
@ -77,14 +74,10 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
// the current interval in the list
private GenomeLoc currentInterval = null;
// the reads that fall into the current interval
// the reads and known indels that fall into the current interval
private ReadBin readsToClean = new ReadBin();
private ArrayList<SAMRecord> readsNotToClean = new ArrayList<SAMRecord>();
private TreeSet<VariationRod> knownIndelsToTry = new TreeSet<VariationRod>(new Comparator<VariationRod>(){
public int compare(VariationRod rod1, VariationRod rod2) {
return (int)(rod1.getLocation().getStart() - rod2.getLocation().getStart());
}
});
private ArrayList<VariationRod> knownIndelsToTry = new ArrayList<VariationRod>();
// the wrapper around the SAM writer
private Map<String, SAMFileWriter> writers = null;
@ -150,24 +143,6 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
// set up the random generator
generator = new Random(RANDOM_SEED);
// set up the rods (since this is a ReadWalker we don't get rods from the traversal)
logger.info("Reading and parsing known indel rod files...");
List<ReferenceOrderedData<? extends ReferenceOrderedDatum>> rods = new ArrayList<ReferenceOrderedData<? extends ReferenceOrderedDatum>>();
ReferenceOrderedData.parseBindings(knownIndels, rods);
for ( ReferenceOrderedData<? extends ReferenceOrderedDatum> rod : rods ) {
if ( !VariationRod.class.isAssignableFrom(rod.getType()) )
continue;
SeekableRODIterator<? extends ReferenceOrderedDatum> iter = rod.iterator();
while ( iter.hasNext() ) {
RODRecordList<? extends ReferenceOrderedDatum> records = iter.next();
for ( ReferenceOrderedDatum record : records ) {
if ( ((VariationRod)record).isIndel() )
knownIndelsToTry.add((VariationRod)record);
}
}
}
logger.info("Finished reading and parsing known indel rod files");
if ( OUT_INDELS != null ) {
try {
indelOutput = new FileWriter(new File(OUT_INDELS));
@ -280,6 +255,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
else { // the read is past the current interval
clean(readsToClean);
knownIndelsToTry.clear();
// merge the two sets for emission
readsNotToClean.addAll(readsToClean.getReads());
@ -307,6 +283,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
public void onTraversalDone(Integer result) {
if ( readsToClean.size() > 0 || readsNotToClean.size() > 0 ) {
clean(readsToClean);
knownIndelsToTry.clear();
// merge the two sets for emission
readsNotToClean.addAll(readsToClean.getReads());
@ -393,19 +370,11 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
int totalMismatchSum = 0;
// if there are any known indels for this region, get them
while ( knownIndelsToTry.size() > 0 ) {
VariationRod knownIndel = knownIndelsToTry.first();
if ( knownIndel.getLocation().isBefore(readsToClean.getLocation()) ) {
knownIndelsToTry.remove(knownIndel);
} else if ( knownIndel.getLocation().overlapsP(readsToClean.getLocation()) ) {
knownIndelsToTry.remove(knownIndel);
String indelStr = knownIndel.isInsertion() ? knownIndel.getAlternateAlleleList().get(0) : Utils.dupString('-', knownIndel.getAlleleList().get(0).length());
Consensus c = createAlternateConsensus((int)(knownIndel.getLocation().getStart() - leftmostIndex), reference, indelStr, knownIndel.isDeletion());
if ( c != null )
altConsenses.add(c);
} else {
break;
}
for ( VariationRod knownIndel : knownIndelsToTry ) {
String indelStr = knownIndel.isInsertion() ? knownIndel.getAlternateAlleleList().get(0) : Utils.dupString('-', knownIndel.getAlleleList().get(0).length());
Consensus c = createAlternateConsensus((int)(knownIndel.getLocation().getStart() - leftmostIndex), reference, indelStr, knownIndel.isDeletion());
if ( c != null )
altConsenses.add(c);
}
// decide which reads potentially need to be cleaned