genbank_parse.pl

#!/usr/local/bin/perl -w

# Extract sequence features from a GenBank report with BioPerl.
# Once the features are parsed, they can be printed in any format.
# WI Bioinformatics course - Feb 2002 - Lecture 6

use Bio::SeqIO;

# Formats: Fasta, EMBL. GenBank, Swissprot (swiss), PIR and GCG
$seqin = Bio::SeqIO->new( '-format' => 'Genbank' , -file => 'genbank_sample.txt');

while(my $seqobj = $seqin->next_seq())
{
   print "Sequence: ", $seqobj->display_id, "\n\n";

   foreach $feat ($seqobj->all_SeqFeatures()) 
   {
      print $feat->primary_tag, " (from ", $feat->start, " to ", 
         $feat->end,  ")", "\n";

      foreach $tag ($feat->all_tags()) 
      {
          print "\t", $tag, ": ", join(' ',$feat->each_tag_value($tag)), "\n";
      }
      print "new feature\n" if $feat->has_tag('new');
      
      # features can have sub features
      # @subfeat = $feat->sub_SeqFeature();
   }
}