#!/bin/perl -w # Do blast search and retrieve top 10 target sequences # See help at http://www.bioperl.org/HOWTOs/html/SearchIO.html for all data that can be extracted use Bio::SearchIO; # Prompt the user for the query file name if it's not an argument if (! $ARGV[0]) { print "What is the query file for the blast? "; # Get input and remove the newline character at the end chomp ($inFile =); } else { $inFile = $ARGV[0]; } # Prompt the user for the file of blast result if (! $ARGV[1]) { print "What is name of the blast output file? "; # Get input and remove the newline character at the end chomp ($blastFile = ); } else { $blastFile = $ARGV[1]; } # Prompt the user for the target sequence file if (! $ARGV[2]) { print "what is the name of blast targets file? "; # Get input and remove the newline character at the end chomp ($outFile = ); } else { $outFile = $ARGV[2]; } print "Wait, it's blasting your query...\n"; `blastall -p blastp -d /cluster/db0/Data/nr -i $inFile -o $blastFile`; #blast command $report = new Bio::SearchIO( -file=>"$blastFile", -format => "blast" ); open(FH, ">$outFile") || die "Can not write to $outFile\n"; #open file for save # Go through BLAST reports one by one while($result = $report->next_result) { # Count hits $i = 0; # Go through each each matching sequence while($hit = $result->next_hit) { $i++; #count the num of hits if ($i <= 10) { # the max number of hits $targetAcc = $hit->accession(); # retrieve the database accession print "PARSE $i HIT: $targetAcc\n"; # print out for debugging $fastaSeq = `fastacmd -d /cluster/db0/Data/nr -s $targetAcc`; # extract target sequences print FH $fastaSeq; # save the target sequence in $outFile } } } print "Done\n"; close (FH) || die "Can not close $outFile\n";