Perl OBO parser

Perl example: an OBO parser

The contents of this page has recently been imported from an older version of this Wiki. This page may contain outdated information, information that is irrelevant for this Wiki, information that needs to be differently structured, outdated syntax, and/or broken links. Use with caution!

This sample code parses an OBO formated file of GO terms and creates a datastructure that stores relationship information for nodes

#!/usr/bin/perl
# parseGOtree
# B. Steipe April 2008

#root node: molecular_function: GO:0003674
#root node: cellular_component: GO:0005575
#root node: biological_process: GO:0008150

use warnings;
use strict;

# Parameters
my $oboFile = "gene_ontology_2008_04_03.obo";
#my $ontNS = "biological_process";
my $ontNS = "cellular_component";
#my $ontNS = "molecular_function";

# Declarations
my %GOtree;  # hash to hold the GO tree
my @Term;    # array for one Term
my @IsA;     # array for parent nodes of one term
my @Rel;     # array for other relationships
my $ID;      # current GO ID
my $name;    # name of current GO ID

open (OBO, $oboFile) or die "Panic! $!\n";

while (my $line = <OBO>) {
    
    if ( $line =~ m/^\[Term\]/ ) {
        @Term = (); # Initalize
        while ($line = <OBO>) {
            if ($line !~ m/^\s*$/) {
                push (@Term, $line);
            }
            else {
                last;
            }
        } # done reading Term, process it
        my $use = 1;
        @IsA = ();
        @Rel = ();
        $ID = "";
        foreach my $record (@Term) {
            chomp($record);
            if ($record =~ m/^id: (GO:\d+)/) { $ID = $1; }
            elsif ($record =~ m/^namespace:/ && $record !~ m/$ontNS/) { $use = 0; }
            elsif ($record =~ m/^is_obsolete: true/) { $use = 0; }
            elsif ($record =~ m/^name: (.*)$/) { $name = $1; }
            elsif ($record =~ m/^is_a: (GO:\d+)/) { push (@IsA, $1); }
            elsif ($record =~ m/^relationship:/) { push (@Rel, $record); }
        }
        if (scalar(@Rel) && !scalar(@IsA)) { $use = 0; }  #This Term has only "other" relationships - ignore this Term
        if ($use) {
            $GOtree{$ID}{"name"} = $name;
            if (! scalar(@IsA)) {
                $GOtree{$ID}{"parent"}[0] = 0;
            }
            else {
                foreach my $i (@IsA) {
                    push (@{$GOtree{$ID}{"parent"}}, $i);
                }
            }
        } # end if ($use)
    } # end processing Term
} # end processing file

print "All Terms processed: found ", scalar(keys(%GOtree)), " Terms in the file.\n";

foreach $ID (keys(%GOtree)) {
    my $a = $GOtree{$ID}{"parent"}[0];
    if (! $GOtree{$ID}{"parent"}[0] ) {
        print ("GO term $ID (\"", $GOtree{$ID}{"name"}, "\") has no parent.\n");
    }
}


exit();

Perl OBO parser

Contents

Further reading and resources

Navigation menu

Personal tools

Namespaces

Variants

Views

More

Search

Sections

Tools