Hidden mode in ValidationAmplicons to support ILMN output format (same as Sequenom, with just shuffled columns)

This commit is contained in:
Guillermo del Angel 2012-03-08 12:29:44 -05:00
parent 3b5a7c34d7
commit 858acf8616
1 changed files with 19 additions and 3 deletions

View File

@ -134,6 +134,10 @@ public class ValidationAmplicons extends RodWalker<Integer,Integer> {
@Argument(doc="Use Sequenom output format instead of regular FASTA",fullName="sqnm",required=false)
boolean sequenomOutput = false;
@Hidden
@Argument(doc="Use ILMN output format instead of regular FASTA",fullName="ilmn",required=false)
boolean ilmnOutput = false;
GenomeLoc prevInterval;
GenomeLoc allelePos;
@ -141,6 +145,7 @@ public class ValidationAmplicons extends RodWalker<Integer,Integer> {
StringBuilder sequence;
StringBuilder rawSequence;
boolean sequenceInvalid;
boolean isSiteSNP;
List<String> invReason;
int indelCounter;
@ -169,6 +174,9 @@ public class ValidationAmplicons extends RodWalker<Integer,Integer> {
header.setSequenceDictionary(referenceDictionary);
header.setSortOrder(SAMFileHeader.SortOrder.unsorted);
}
if (ilmnOutput)
out.println("Locus_Name,Target_Type,Sequence,Chromosome,Coordinate,Genome_Build_Version,Source,Source_Version,Sequence_Orientation,Plus_Minus,Force_Infinium_I");
}
public Integer reduceInit() {
@ -234,6 +242,8 @@ public class ValidationAmplicons extends RodWalker<Integer,Integer> {
}
rawSequence.append(Character.toUpperCase((char) ref.getBase()));
} else if ( validate != null ) {
// record variant type in case it's needed in output format
isSiteSNP = (validate.isSNP());
// doesn't matter if there's a mask here too -- this is what we want to validate
if ( validate.isFiltered() ) {
logger.warn("You are attempting to validate a filtered site. Why are you attempting to validate a filtered site? You should not be attempting to validate a filtered site.");
@ -496,13 +506,19 @@ public class ValidationAmplicons extends RodWalker<Integer,Integer> {
if (!onlyOutputValidAmplicons || !sequenceInvalid) {
String seqIdentity = sequence.toString().replace('n', 'N').replace('i', 'I').replace('d', 'D');
if (!sequenomOutput)
out.printf(">%s %s %s%n%s%n", allelePos != null ? allelePos.toString() : "multiple", valid, probeName, seqIdentity);
else {
if (sequenomOutput) {
seqIdentity = seqIdentity.replace("*",""); // identifier < 20 letters long, no * in ref allele, one line per record
probeName = probeName.replace("amplicon_","a");
out.printf("%s_%s %s%n", allelePos != null ? allelePos.toString() : "multiple", probeName, seqIdentity);
}
else if (ilmnOutput) {
String type = isSiteSNP?"SNP":"INDEL";
seqIdentity = seqIdentity.replace("*",""); // no * in ref allele
out.printf("%s,%s,%s,%s,%d,37,1000G,ExomePhase1,Forward,Plus,FALSE%n",probeName,type,seqIdentity,allelePos.getContig(),allelePos.getStart());
}
else{
out.printf(">%s %s %s%n%s%n", allelePos != null ? allelePos.toString() : "multiple", valid, probeName, seqIdentity);
}
}
}
}