-
Notifications
You must be signed in to change notification settings - Fork 1
/
parse_morbidmap.pl
executable file
·147 lines (96 loc) · 3.3 KB
/
parse_morbidmap.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
#!/usr/bin/env perl
#
# Description:
#
#
#
# Created by Jessica Chong on 2015-02-22.
use strict;
use warnings;
use Getopt::Long;
use Pod::Usage;
my ($inputfile, $outputfile, $help);
GetOptions(
'in=s' => \$inputfile,
'out=s' => \$outputfile,
'help|?' => \$help,
) or pod2usage(-verbose => 1) && exit;
pod2usage(-verbose=>1, -exitval=>1) if $help;
if (!defined $inputfile) {
pod2usage(-exitval=>2, -verbose=>1, -message => "$0: --in not defined.\n")
} elsif (!defined $outputfile) {
pod2usage(-exitval=>2, -verbose=>1, -message => "$0: --out not defined\n");
}
# For the file morbidmap, the fields are, in order:
# 1 - Disorder, <disorder MIM no.> (<phene mapping key>)
# 2 - Gene/locus symbols
# 3 - Gene/locus MIM no.
# 4 - cytogenetic location
open (my $output_handle, ">", "$outputfile") or die "Cannot write to $outputfile: $!.\n";
print $output_handle "phenoname\tphenoMIMnum\tphenoMappingKey\tLocusSymbols\tGeneMIMnum\tCytoLoc\tisComplex\n";
open (my $input_handle, "$inputfile") or die "Cannot read $inputfile: $!.\n";
while ( <$input_handle> ) {
$_ =~ s/\s+$//; # Remove line endings
if ($_ =~ /^#/) {
next;
}
my ($phenoname, $locussymbol, $locusMIM, $cytoloc) = split(/\t/, $_);
my ($phenoMIM, $phenomappingkey) = qw(NA -9);
if ($phenoname =~ m/(\d{6})/) {
$phenoMIM = $1;
}
if ($phenoname =~ m/\((\d)\)$/) {
$phenomappingkey = $1;
}
if ($phenomappingkey == 4) {
# if a chromosomal del/dup syndrome, the gene MIM *is usually* the phenotype MIM
if ($phenoMIM eq 'NA') {
$phenoMIM = $locusMIM;
}
}
# if no phenotype MIM number, the gene MIM *is usually* the phenotype MIM
# if ($phenoMIM eq 'NA') {
# $phenoMIM = $locusMIM;
# }
my $isComplex = "no";
# remove the following phenotypes:
# QTL or quantitative trait locus
# suscep* (susceptibility but susceptibility is spelled incorrectly in OMIM in a few entries)
# risk
# [] or {}
# !!! do not remove somatic but should flag
if ($phenoname =~ "risk" || $phenoname =~ m/QUANTITATIVE TRAIT LOCUS/i || $phenoname =~ m/QTL/i || $phenoname =~ /\[/ || $phenoname =~ /\{/ || $phenoname =~ m/suscep(\w+) to/) {
$isComplex = "yes";
}
if ($phenoname =~ "somatic" && $phenoname =~ /carcinoma|cancer|tumor|leukemia|lymphoma|sarcoma|blastoma|adenoma|cytoma|myelodysplastic|Myelofibrosis|oma,/i ) {
$isComplex = "cancer";
} elsif ($phenoname =~ "somatic" && $isComplex ne 'yes') {
$isComplex = "somatic";
}
print $output_handle "$phenoname\t$phenoMIM\t$phenomappingkey\t$locussymbol\t$locusMIM\t$cytoloc\t$isComplex\n";
}
close $input_handle;
close $output_handle;
################################################################################################################
############################################ Documentation #####################################################
################################################################################################################
=head1 NAME
parse_morbidmap.pl - Parse morbidmap into a more useful form!
=head1 SYNOPSIS
perl B<xxxx.pl> I<[options]>
=head1 ARGUMENTS
=over 4
=item B<--in> F<input file>
input file
=item B<--out> F<output file>
name of output file
=item B<--help> I<help>
print documentation
=back
=head1 FILES
xx
=head1 EXAMPLES
xxxxxxxx
=head1 AUTHOR
Jessica Chong ([email protected], [email protected])
=cut