Perl OBO parser
Jump to navigation
Jump to search
Perl example: an OBO parser
The contents of this page has recently been imported from an older version of this Wiki. This page may contain outdated information, information that is irrelevant for this Wiki, information that needs to be differently structured, outdated syntax, and/or broken links. Use with caution!
This sample code parses an OBO formated file of GO terms and creates a datastructure that stores relationship information for nodes
Contents
This code is designed for the daily release of GO terms (download page). It was written for one particular purpose and has not been engineered to be fully and generally compliant with the OBO format specification.
#!/usr/bin/perl
# parseGOtree
# B. Steipe April 2008
#root node: molecular_function: GO:0003674
#root node: cellular_component: GO:0005575
#root node: biological_process: GO:0008150
use warnings;
use strict;
# Parameters
my $oboFile = "gene_ontology_2008_04_03.obo";
#my $ontNS = "biological_process";
my $ontNS = "cellular_component";
#my $ontNS = "molecular_function";
# Declarations
my %GOtree; # hash to hold the GO tree
my @Term; # array for one Term
my @IsA; # array for parent nodes of one term
my @Rel; # array for other relationships
my $ID; # current GO ID
my $name; # name of current GO ID
open (OBO, $oboFile) or die "Panic! $!\n";
while (my $line = <OBO>) {
if ( $line =~ m/^\[Term\]/ ) {
@Term = (); # Initalize
while ($line = <OBO>) {
if ($line !~ m/^\s*$/) {
push (@Term, $line);
}
else {
last;
}
} # done reading Term, process it
my $use = 1;
@IsA = ();
@Rel = ();
$ID = "";
foreach my $record (@Term) {
chomp($record);
if ($record =~ m/^id: (GO:\d+)/) { $ID = $1; }
elsif ($record =~ m/^namespace:/ && $record !~ m/$ontNS/) { $use = 0; }
elsif ($record =~ m/^is_obsolete: true/) { $use = 0; }
elsif ($record =~ m/^name: (.*)$/) { $name = $1; }
elsif ($record =~ m/^is_a: (GO:\d+)/) { push (@IsA, $1); }
elsif ($record =~ m/^relationship:/) { push (@Rel, $record); }
}
if (scalar(@Rel) && !scalar(@IsA)) { $use = 0; } #This Term has only "other" relationships - ignore this Term
if ($use) {
$GOtree{$ID}{"name"} = $name;
if (! scalar(@IsA)) {
$GOtree{$ID}{"parent"}[0] = 0;
}
else {
foreach my $i (@IsA) {
push (@{$GOtree{$ID}{"parent"}}, $i);
}
}
} # end if ($use)
} # end processing Term
} # end processing file
print "All Terms processed: found ", scalar(keys(%GOtree)), " Terms in the file.\n";
foreach $ID (keys(%GOtree)) {
my $a = $GOtree{$ID}{"parent"}[0];
if (! $GOtree{$ID}{"parent"}[0] ) {
print ("GO term $ID (\"", $GOtree{$ID}{"name"}, "\") has no parent.\n");
}
}
exit();
Further reading and resources