#!/bin/sh -

# This script will send a file to netscape with database IDs linked to SRS.
# To customize this script edit the $DATABASES and $SRS_SERVER variable in the
# perl code below and the NETSCAPE variable at the top of the script.

# To enable this in Artemis, sanger_options must be set to true in the options 
# file.

NETSCAPE=/usr/bin/X11/real-netscape

if [ -f "$DIANA_ENVIRONMENT_FILE" ]
then
   . $DIANA_ENVIRONMENT_FILE
fi

if [ $# = 0 ]
then
    echo no argument given 1>&2
    exit 1
fi

file_arg=$1

unique_bit=$$.`hostname`

if [ -f ./$file_arg ]
then
    # the file is in the current directory - we need the full path so netscape
    # can find the file
    new_file=$PWD/$file_arg.$unique_bit.html
else
    new_file=$file_arg.$unique_bit.html
fi

cat <<EOF > $new_file
<HTML>
 <HEAD>
  <TITLE>
  Results for $file_arg
 </TITLE>
 </HEAD>
 <BODY>
<PRE>
EOF

perl -e '
BEGIN {
  # change these variable to list the databases to search for the IDs - the
  # database names should be separated by spaces
  $PROTEIN_DATABASES = "swall";
  $DNA_DATABASES = "embl";

  # change this to point to the wgetz script of your SRS server
  $SRS_SERVER = "www.sanger.ac.uk/srs6bin/cgi-bin/wgetz?-e+";

  $PROTEIN_DATABASES =~ s/ /%20/g;
  $DNA_DATABASES =~ s/ /%20/g;

  $BLAST_START_LINE = "Sequences producing High-scoring Segment Pairs|" . 
    "Sequences producing significant alignments:";
  $FASTA_START_LINE = "The best scores are";

  # the list of IDs we have seen so far
  @ids = ();

  # the list of IDs we have made anchors for so far
  @anchored_ids = ();

  $db_type = "unknown";
}

sub hyperlink_to_anchor
{
  $id = shift;
  qq(<a href="#$id">$id</a>);
}

sub hyperlink_id
{
  $id = shift;

  if ($db_type eq "dna") {
    $r = qq#<a href="http://$SRS_SERVER\[\{$DNA_DATABASES\}-ID:$id*]|[\{$DNA_DATABASES\}-AccNumber:$id*]">$id</a>#;
  } else {
    $r = qq#<a href="http://$SRS_SERVER\[\{$PROTEIN_DATABASES\}-ID:$id*]|[\{$PROTEIN_DATABASES\}-AccNumber:$id*]">$id</a>#;

    # special case for tremblnew entries - check tremblnew and if the entry
    # is not there, get the entry from TREMBL via the dbxref
    if ($id =~ /[a-z][a-z][a-z]\d\d\d\d\d/i) {

      system "efetch trnew:$id > /dev/null 2> /dev/null";

      if ($?) {
        # it failed so try using dbxref

        $r =
        qq#<a href="http://$SRS_SERVER\[{$PROTEIN_DATABASES}-dbxref:$id*]">$id</a>#;
      }
    }
  }
  return $r
}

$file_name = $ARGV[0];

if ($file_name =~ /\.gz$/) {
  open IN_FILE, "gzip -d < $file_name |" or die "failed to open $file_name\n";
} else {
  open IN_FILE, "$file_name" or die "failed to open $file_name\n";
}

while (<IN_FILE>) {
  if ($. == 1) {
    if (/^\s*([^\s]+)/) {
      if (lc $1 eq "blastn" or lc $1 eq "tblastn" or lc $1 eq "tblastx") {
        $db_type = "dna";
      } else {
        if (lc $1 eq "fasta" or lc $1 eq "blastp" or lc $1 eq "blastx") {
          $db_type = "protein";
        } else {
          print "\nWARNING: could not identify file type: $1\n";
        }
      }
    } else {
      print "\nWARNING: could not identify file type\n";
    }
  }


  # ignore header lines
  if (1..m/$BLAST_START_LINE|$FASTA_START_LINE/) {
    print;
    next;
  }

  if (@ids && /^\s*$/) {
    $summary_finished = 1;
  }

  if (/^(?:>?>?(?:[A-Z]+:)?)(\w+) /) {
    $id = $1;
    if ($summary_finished) {
      if ((grep {$_ eq $id} @ids) && (!grep {$_ eq $id} @anchored_ids)) {
        # not anchored yet so make it an anchor
        if (s/\b$id\b/"<a name=\"$id\">" . (hyperlink_id($id)) . "<\/a>"/e) {
          push @anchored_ids, $id;
        }
      }
    } else {
      if (!grep {$_ eq $id} @ids) {
        push @ids, $id;
      }

      s/$id/hyperlink_to_anchor($id)/ei;

      if (!s/ $id/" " . hyperlink_id($id)/ei) {
        # if the id occurs once in the line put a link at end of line
        s/$/"  SRS:" . hyperlink_id($id)/e;
      }
    }
  }
  print;
}

' $file_arg >> $new_file

cat <<EOF >> $new_file;
</PRE>
  </BODY>
</HTML>
EOF

# delete it at some point
echo "rm -f $new_file > /dev/null 2>&1" | at now + 36 hours

if $NETSCAPE -remote "openURL($new_file)"
then
    exit 0
else
    echo starting new netscape 2>&1
    # netscape isn't running - so start it
    ($NETSCAPE &)&

    # now send the URL.  we do things this way so that the script doesn't exit
    # until netscape has successfully shown the URL

    sleep 1

    # don't exit the script until the file is successfully displayed or until
    # 40 seconds is up
    for i in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
    do
        if $NETSCAPE -remote "openURL($new_file)" 2> /dev/null
        then
            exit 0
        else
            sleep 2
        fi
    done

    exit 1
fi
