#!/bin/sh # Given one DNA sequence file as input, # this shell script runs several commands. # It BLASTs the sequence against RefSeq human, rat and mouse protein databases. # For each gene, it prints the number of species with a BLAST match. ######## 1. Check to make sure you get one argument (sequence file) if [ -z "$1" ] then echo " Usage: $0 seq" exit fi ######## 2. Check to make sure the sequence file really exists if [ ! -r "$1" ] then echo "Sequence file $1 doesn't appear to exist." exit 1 fi ####### 3. Place the three blast commands from step 7 of exercise 1 here echo "BLASTing $1 against RefSeq databases...." blastall -p blastx -e 1e-10 -m 8 -i $1 -d hs.faa -o hs_blast.txt blastall -p blastx -e 1e-10 -m 8 -i $1 -d rn.faa -o rn_blast.txt blastall -p blastx -e 1e-10 -m 8 -i $1 -d mouse.faa -o mm_blast.txt echo 'BLAST results are hs_blast.txt, rn_blast.txt, and mm_blast.txt.' ####### 4. Place the first three commands from step 10 of exercise 1 (listing the query sequences mapped to RefSeq proteins) here cut -f1 hs_blast.txt|sort -u > query_has_hs.txt cut -f1 rn_blast.txt|sort -u > query_has_rn.txt cut -f1 mm_blast.txt|sort -u > query_has_mm.txt ####### 5. Place the command from step 11 of exercise 1, *AND* redirect the output to a file named mapped_genes_summary.txt cat query_has_hs.txt query_has_rn.txt query_has_mm.txt|sort |uniq -c|sort -k1,1nr > mapped_genes_summary.txt echo 'For each transcript with a BLAST hit, see mapped_genes_summary.txt for the number of species with alignable proteins.' echo 'All Done.'