#!/bin/sh
#
# download BLAST databases 'pdb' and 'nr' in FASTA format
# and format with formatdb
#

echo "update BLAST databases for use with Prime"
echo ""

#determine the platform
if [ ! -n "$SCHRODINGER" ]; then
    echo "ERROR: SCHRODINGER environment variable not set"
    echo "       unable to determine platform"
    exit 1
fi
echo "SCHRODINGER=$SCHRODINGER"

#is platform executable?
if [ ! -x "$SCHRODINGER/platform" ]; then
    echo "ERROR: $SCHRODINGER/platform script missing or not executable"
    exit 1
fi

PLATFORM=`$SCHRODINGER/platform -1`
echo "PLATFORM=$PLATFORM"
echo ""

# determine the location of the BLAST databases

echo "determining Prime BLAST database directory ..."
echo ""

#is SCHRODINGER_THIRDPARTY set?
if [ -n "$SCHRODINGER_THIRDPARTY" ]; then
    echo "SCHRODINGER_THIRDPARTY=$SCHRODINGER_THIRDPARTY"
    TPDIR=$SCHRODINGER_THIRDPARTY
else
#default to SCHRODINGER/thirdparty
    echo "using default location $SCHRODINGER/thirdparty"
    TPDIR=$SCHRODINGER/thirdparty
fi

echo "Prime thirdparty directory: $TPDIR"

#determine/check dirs for blast database and executables
BLASTDB_DIR="$TPDIR/database/blast"
echo "BLAST database: $BLASTDB_DIR"

#make sure the BLAST directory exists
if [ ! -d "$BLASTDB_DIR" ]; then
    echo "ERROR: Prime BLAST database $BLASTDB_DIR does not exist"
    exit 1
fi

BLASTEXEC_DIR="$TPDIR/bin/$PLATFORM/blast"
echo "BLAST executables: $BLASTEXEC_DIR"

#make sure the formatdb command is available
FORMATDB="$BLASTEXEC_DIR/formatdb"
if [ ! -x "$FORMATDB" ]; then
    echo "ERROR: formatdb program $FORMATDB missing or not executable"
    exit 1
fi
echo "formatdb program: $FORMATDB"

#change dirs
cd "$BLASTDB_DIR"
if [ $? -gt 0 ]; then
    echo "unable to change to directory $BLASTDB_DIR"
    exit 1
fi

#archive existing databases
ARCHIVE="archive"
echo ""
echo "archiving existing database"
if [ ! -d "$ARCHIVE" ]; then
    mkdir "$ARCHIVE"
    if [ $? -gt 0 ]; then
	echo "ERROR: unable to create archive directory"
	exit 1
    fi
fi

#archive/move the existing databases
for file in `ls nr.??? pdb.???`; do
    echo "archiving file: $file"
    mv $file $ARCHIVE/$file
    if [ $? -gt 0 ]; then
	echo "ERROR: failed to move $file to $ARCHIVE directory"
	exit 1
    fi
done

#download the PDB and NR database
FTP_HOST="ftp.ncbi.nih.gov"
FTP_DIR="blast/db/FASTA/"
FTP_PASSWORD="user@localhost.localdomain"

#ftp the database files
echo ""
echo "connecting to FTP server $FTP_HOST"
echo ""
echo "!!! you might have to press ENTER at the password prompt !!!"
echo ""

ftp <<EOF
open $FTP_HOST
anonymous
cd $FTP_DIR
pwd
ls pdbaa.gz
ls nr.gz
binary
hash
get pdbaa.gz
get nr.gz
quit
EOF

#do not continue if we had problems during the FTP transfer
if [ $? -gt 0 ]; then
    echo ""
    echo "ERROR: problems during FTP transfer"
    echo ""
    exit 1
fi

#gunzip and rename
echo ""
for file in pdbaa.gz nr.gz; do
    echo "uncompressing database file: $file"    
    gzip -f -d $file
    if [ $? -gt 0 ]; then
	echo "ERROR: unable to uncompress $file"
	exit 1
    fi
done

#rename pdbaa
echo "renaming 'pdbaa' to 'pdb'"
/bin/mv pdbaa pdb
if [ $? -gt 0 ]; then
    echo "ERROR: unable to rename file 'pdbaa' to 'pdb'"
    exit 1
fi

#format the two databases
for file in pdb nr; do
    echo "formatting database: $file"
    $FORMATDB -i $file -p T -o T
    if [ $? -gt 0 ]; then
	echo "WARNING: problems during formatting of BLAST databases"
    fi
done

#remove the FASTA files and the archive directory
echo "cleaning up ..."
/bin/rm -f pdb
/bin/rm -f nr

#make sure the permissions on the archive files
#are such that we can actually removed them
chmod -R u+w $ARCHIVE
/bin/rm -rf $ARCHIVE

echo ""
echo "DONE"


