Alteration of PlinkToVCF to be much more flexible about parsing .ped file headers, which can have one of a number of different standard fields, and be in different orders.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2650 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
chartl 2010-01-21 18:02:28 +00:00
parent 5b2a1e483e
commit f51cffe220
1 changed files with 4 additions and 2 deletions

View File

@ -40,6 +40,7 @@ public class PlinkToVCF extends RefWalker<VCFRecord,Integer> {
@Argument(fullName="maxHomNonref", doc="Maximum homozygous-nonreference rate (as a proportion) to consider an assay valid", required = false)
public double maxHomNonref = 1.1;
private final Set<String> HEADER_FIELDS = new HashSet<String>(Arrays.asList("#Family ID","Individual ID","Sex","Paternal ID","Maternal ID","Phenotype"));
private final int INIT_NUMBER_OF_POPULATIONS = 10;
private final int DEFAULT_QUALITY = 20;
private HashMap<String, SequenomVariantInfo> sequenomResults = new HashMap<String,SequenomVariantInfo>();
@ -268,7 +269,8 @@ public class PlinkToVCF extends RefWalker<VCFRecord,Integer> {
String[] fields = header.split("\t");
int fieldOffset = 0;
for ( String entry : fields ) {
if ( fieldOffset > 5 ) {
if ( ! HEADER_FIELDS.contains(entry) ) {
//System.out.println(entry);
// actually a SNP
String snpName = entry.split("\\|")[1];
//System.out.println("Entry: "+entry+" Name: "+snpName);
@ -294,7 +296,7 @@ public class PlinkToVCF extends RefWalker<VCFRecord,Integer> {
}
sampleNames.add(entries[1]);
for ( String entry : entries ) {
if ( offset > 5 ) { // actual SNP
if ( variants.containsKey(offset) ) { // actual SNP
variants.get(offset).addGenotype(entry);
//System.out.println("Added: "+entry+"To "+offset);
}