Patch for I-TASSER 4.0

Hi,

In order to make I-TASSER 4.0 work with our queueing system (Slurm), and to more gracefully handle certain error situations, I put together a rather large patch file.

I've attached it here in the hope that it might be useful. I'm sure it could with only a modest amount of effort be incorporated into I-TASSER 4.1.

Apologies for posting the patch inline; my browser tells me that "file attachments are disabled". Hopefully it can be made sense of, or feel free to contact me and I can email you the patch.

Thanks!


diff -ur /home/brob695/I-TASSER4.0.orig/COACH/COACHmod ./COACH/COACHmod
--- /home/brob695/I-TASSER4.0.orig/COACH/COACHmod 2014-07-31 09:32:25.000000000 +1200
+++ ./COACH/COACHmod 2014-08-14 09:56:51.300041404 +1200
@@ -120,7 +120,7 @@

my $runningjobs="";

-################ submit COFACTOR by qsub
+################ submit COFACTOR by sbatch
my $cofoutput="$outputdir/cofactor";
`mkdir -p $cofoutput`;
my $libfile ="PDBsearchresult_$modelname.dat";
@@ -161,12 +161,12 @@
`chmod a+x $cofoutput/$tag.pl`;
if($runstyle eq "parallel")
{
- $runningjobs=`qstat -f`;
+ $runningjobs=`squeue --format="%24j"`;
if($runningjobs !~ /$tag/)
{
- my $walltime="walltime=50:00:00";
+ my $walltime="50:00:00";
`chmod a+x $cofoutput/$tag.pl`;
- `qsub -e $cofoutput/err_$tag -o $cofoutput/out_$tag -l $walltime -N $tag $cofoutput/$tag.pl`;
+ `sbatch -e $cofoutput/err_$tag -o $cofoutput/out_$tag --time=$walltime -J $tag $cofoutput/$tag.pl`;
print "$tag was submitted!\n";
}
}
@@ -216,8 +216,8 @@
{
if($runningjobs !~ /$tag/)
{
- my $walltime="walltime=24:00:00";
- `qsub -e $tmoutput/err_$tag -o $tmoutput/out_$tag -l $walltime -N $tag $tmoutput/$prog_name`;
+ my $walltime="24:00:00";
+ `sbatch -e $tmoutput/err_$tag -o $tmoutput/out_$tag --time=$walltime -J $tag $tmoutput/$prog_name`;
print "$tag was submitted!\n";
}
}
Only in .: I-TASSER4.0-NeSI-Pan.patch
diff -ur /home/brob695/I-TASSER4.0.orig/I-TASSERmod/dPPAS2mod ./I-TASSERmod/dPPAS2mod
--- /home/brob695/I-TASSER4.0.orig/I-TASSERmod/dPPAS2mod 2014-07-28 09:49:03.000000000 +1200
+++ ./I-TASSERmod/dPPAS2mod 2014-08-14 17:20:43.888829453 +1200
@@ -152,7 +152,7 @@
pos6:;
}
$Lch=length $sequence;
-open(seq,">protein.seq");
+open(seq,">protein.seq") || die "Could not create or open protein.seq for writing!\n";
printf seq ">protein\n";
for($i=1;$i<=$Lch;$i++)
{
@@ -178,11 +178,11 @@
else
{
printf "running Psi-blast .....\n";
-`$blastdir/blastpgp -b 1000 -j 3 -h 0.001 -d $db -i protein.seq -C psitmp.chk > blast.out`;
+system("$blastdir/blastpgp -b 1000 -j 3 -h 0.001 -d $db -i protein.seq -C psitmp.chk > blast.out") == 0 || die "An error occurred while running blastpgp. Stop.\n";
}
########### extract 'pre.prf' ###################
#### record multiple sequence alignment $am{i_seq,i_pos} -------->
-open(blast,"blast.out");
+open(blast,"blast.out") || die "Could not open blast.out for reading!\n";
while($line=)
{
if($line=~/Results from round\s+(\d+)/)
@@ -298,7 +298,7 @@
}
#^^^^^^^^^ Henikoff frequence finished ^^^^^^^^^^^^^

-open(freq,">protein.prf");
+open(freq,">protein.prf") || die "Could not create or open protein.prf for writing!\n";
printf freq "$Lch\n";
for($i=1;$i<=$Lch;$i++)
{
@@ -321,7 +321,7 @@
`mv psitmp.mtx protein.mtx`;

########### run zalign #############
-open(in,">in.dd");
+open(in,">in.dd") || die "Could not create or open in.dd for writing!\n";
printf in "seq.dat\n";
printf in "protein.seq\n";
printf in "protein.mtx\n";
@@ -334,10 +334,10 @@
close(in);

printf "running zalign .....\n";
-`./zalign`;
+system("./zalign") == 0 || die "An error occurred while running zalign. Stop.\n";

################ calculate Z-score ######################
-open(out,"rst.dat");
+open(out,"rst.dat") || die "Could not create or open rst.dat for writing!\n";
$i=0;
while($line=)
{
@@ -364,7 +364,7 @@
###########################################################
##### create template file 'init.dat' #####################
###########################################################
-open(init,">init.dat");
+open(init,">init.dat") || die "Could not create or open init.dat for writing!\n";

$i_t=0;
for($i=1;$i<=$N_hit;$i++)
@@ -373,7 +373,7 @@
$template_name=~s/\./\\\./mg; #useful for match
$zscore_value=$zscore{$zscore_keys[$i-1]};
######## read the alignment -------->
- open(align,"align.dat");
+ open(align,"align.dat") || die "Could not open align.dat for reading!\n";
while($line=)
{
if($line=~/structureX:$template_name\s*\:/)
@@ -438,7 +438,7 @@
}
$i_t++;

- open(temppdb,"temp.pdb");
+ open(temppdb,"temp.pdb") || die "Could not open temp.pdb for reading!\n";
$n=0;
while($line=)
{
@@ -506,7 +506,7 @@
$time=`date`;
close(init);

-open(init1,">init1.dat");
+open(init1,">init1.dat") || die "Could not create or open init1.dat for writing!\n";
printf init1 "%5d %5d (N_temp, Lch)\n",$i_t,$Lch;
close(init1);
`cat init.dat >> init1.dat`;
@@ -519,7 +519,6 @@
`sync`;
`sync`;
sleep(1);
-`rm -fr $work_dir`;
+#SLURM system("rm -rf $work_dir");

exit();
-
diff -ur /home/brob695/I-TASSER4.0.orig/I-TASSERmod/dPPASmod ./I-TASSERmod/dPPASmod
--- /home/brob695/I-TASSER4.0.orig/I-TASSERmod/dPPASmod 2014-07-28 09:49:01.000000000 +1200
+++ ./I-TASSERmod/dPPASmod 2014-08-14 17:20:55.377294963 +1200
@@ -152,7 +152,7 @@
pos6:;
}
$Lch=length $sequence;
-open(seq,">protein.seq");
+open(seq,">protein.seq") || die "Could not create or open protein.seq for writing!\n";
printf seq ">protein\n";
for($i=1;$i<=$Lch;$i++)
{
@@ -178,11 +178,11 @@
else
{
printf "running Psi-blast .....\n";
-`$blastdir/blastpgp -b 1000 -j 3 -h 0.001 -d $db -i protein.seq -C psitmp.chk > blast.out`;
+system("$blastdir/blastpgp -b 1000 -j 3 -h 0.001 -d $db -i protein.seq -C psitmp.chk > blast.out") == 0 || die "An error occurred while running blastpgp. Stop.\n";
}
########### extract 'pre.prf' ###################
#### record multiple sequence alignment $am{i_seq,i_pos} -------->
-open(blast,"blast.out");
+open(blast,"blast.out") || die "Could not open blast.out for reading!\n";
while($line=)
{
if($line=~/Results from round\s+(\d+)/)
@@ -297,7 +297,7 @@
}
#^^^^^^^^^ Henikoff frequence finished ^^^^^^^^^^^^^

-open(freq,">protein.prf");
+open(freq,">protein.prf") || die "Could not create or open protein.prf for writing!\n";
printf freq "$Lch\n";
for($i=1;$i<=$Lch;$i++)
{
@@ -322,7 +322,7 @@
`mv psitmp.mtx protein.mtx`;

########### run zalign #############
-open(in,">in.dd");
+open(in,">in.dd") || die "Could not create or open in.dd for writing!\n";
printf in "seq.dat\n";
printf in "protein.seq\n";
printf in "protein.mtx\n";
@@ -335,10 +335,10 @@
close(in);

printf "running zalign .....\n";
-`./zalign`;
+system("./zalign") == 0 || die "An error occurred while running zalign. Stop.\n";

################ calculate Z-score ######################
-open(out,"rst.dat");
+open(out,"rst.dat") || die "Could not open rst.dat for reading!\n";
$i=0;
while($line=)
{
@@ -365,7 +365,7 @@
###########################################################
##### create template file 'init.dat' #####################
###########################################################
-open(init,">init.dat");
+open(init,">init.dat") || die "Could not create or open init.dat for writing!\n";

$i_t=0;
for($i=1;$i<=$N_hit;$i++)
@@ -439,7 +439,7 @@
}
$i_t++;

- open(temppdb,"temp.pdb");
+ open(temppdb,"temp.pdb") || die "Could not open temp.pdb for reading!\n";
$n=0;
while($line=)
{
@@ -507,7 +507,7 @@
$time=`date`;
close(init);

-open(init1,">init1.dat");
+open(init1,">init1.dat") || die "Could not create or open init1.dat for writing!\n";
printf init1 "%5d %5d (N_temp, Lch)\n",$i_t,$Lch;
close(init1);
`cat init.dat >> init1.dat`;
@@ -520,9 +520,6 @@
`sync`;
`sync`;
sleep(1);
-`rm -fr $work_dir`;
+#SLURM system("rm -rf $work_dir");

exit();
-
-
-
diff -ur /home/brob695/I-TASSER4.0.orig/I-TASSERmod/Env-PPASmod ./I-TASSERmod/Env-PPASmod
--- /home/brob695/I-TASSER4.0.orig/I-TASSERmod/Env-PPASmod 2014-07-28 09:49:02.000000000 +1200
+++ ./I-TASSERmod/Env-PPASmod 2014-08-14 17:21:01.216344116 +1200
@@ -152,7 +152,7 @@
pos6:;
}
$Lch=length $sequence;
-open(seq,">protein.seq");
+open(seq,">protein.seq") || die "Could not create or open protein.seq for writing!\n";
printf seq ">protein\n";
for($i=1;$i<=$Lch;$i++)
{
@@ -178,11 +178,11 @@
else
{
printf "running Psi-blast .....\n";
-`$blastdir/blastpgp -b 1000 -j 3 -h 0.001 -d $db -i protein.seq -C psitmp.chk > blast.out`;
+system("$blastdir/blastpgp -b 1000 -j 3 -h 0.001 -d $db -i protein.seq -C psitmp.chk > blast.out") == 0 || die "An error occurred while running blastpgp. Stop.\n";
}
########### extract 'pre.prf' ###################
#### record multiple sequence alignment $am{i_seq,i_pos} -------->
-open(blast,"blast.out");
+open(blast,"blast.out") || die "Could not open blast.out for reading!\n";
while($line=)
{
if($line=~/Results from round\s+(\d+)/)
@@ -296,7 +296,7 @@
}
#^^^^^^^^^ Henikoff frequence finished ^^^^^^^^^^^^^

-open(freq,">protein.prf");
+open(freq,">protein.prf") || die "Could not create or open protein.prf for writing!\n";
printf freq "$Lch\n";
for($i=1;$i<=$Lch;$i++)
{
@@ -321,7 +321,7 @@
`mv psitmp.mtx protein.mtx`;

########### run zalign #############
-open(in,">in.dd");
+open(in,">in.dd") || die "Could not create or open in.dd for writing!\n";
printf in "seq.dat\n";
printf in "protein.seq\n";
printf in "protein.prf\n";
@@ -332,10 +332,10 @@
close(in);

printf "running zalign .....\n";
-`./zalign`;
+system("./zalign") == 0 || die "An error occurred while running zalign. Stop.\n";

################ calculate Z-score ######################
-open(out,"rst.dat");
+open(out,"rst.dat") || die "Could not open rst.dat for reading!\n";
$i=0;
while($line=){
if($line=~/(\d+)\s+(\S+)\s+(\S+)/)
@@ -361,7 +361,7 @@
###########################################################
##### create template file 'init.dat' #####################
###########################################################
-open(init,">init.dat");
+open(init,">init.dat") || die "Could not create or open init.dat for writing!\n";

$i_t=0;
for($i=1;$i<=$N_hit;$i++)
@@ -370,7 +370,7 @@
$template_name=~s/\./\\\./mg; #useful for match
$zscore_value=$zscore{$zscore_keys[$i-1]};
######## read the alignment -------->
- open(align,"align.dat");
+ open(align,"align.dat") || die "Could not open align.dat for reading!\n";
while($line=)
{
if($line=~/structureX:$template_name\s*\:/)
@@ -434,7 +434,7 @@
}
$i_t++;

- open(temppdb,"temp.pdb");
+ open(temppdb,"temp.pdb") || die "Could not open temp.pdb for reading!\n";
$n=0;
while($line=)
{
@@ -500,7 +500,7 @@
$time=`date`;
close(init);

-open(init1,">init1.dat");
+open(init1,">init1.dat") || die "Could not create or open init1.dat for writing!\n";
printf init1 "%5d %5d (N_temp, Lch)\n",$i_t,$Lch;
close(init1);
`cat init.dat >> init1.dat`;
@@ -513,7 +513,6 @@
`sync`;
`sync`;
sleep(1);
-`rm -fr $work_dir`;
+#SLURM system("rm -rf $work_dir");

exit();
-
diff -ur /home/brob695/I-TASSER4.0.orig/I-TASSERmod/MUSTERmod ./I-TASSERmod/MUSTERmod
--- /home/brob695/I-TASSER4.0.orig/I-TASSERmod/MUSTERmod 2014-07-28 09:49:02.000000000 +1200
+++ ./I-TASSERmod/MUSTERmod 2014-08-14 17:21:07.798822744 +1200
@@ -237,7 +237,7 @@
else
{
printf "running Psi-blast .....\n";
- `$blastdir/blastpgp -b 1000 -j 3 -h 0.001 -d $db -i protein.seq -C psitmp.chk -Q $pdb\_pssm.txt > blast.out`;
+ system("$blastdir/blastpgp -b 1000 -j 3 -h 0.001 -d $db -i protein.seq -C psitmp.chk -Q $pdb\_pssm.txt > blast.out") == 0 || die "An error occurred while running blastpgp. Stop.\n";
}

#####generate seq.svr.psi and seq.svr.phi ######
@@ -817,7 +817,7 @@
`sync`;
`sync`;
sleep(1);
-`rm -fr $work_dir`;
+#SLURM `rm -fr $work_dir`;
exit();

diff -ur /home/brob695/I-TASSER4.0.orig/I-TASSERmod/PPASmod ./I-TASSERmod/PPASmod
--- /home/brob695/I-TASSER4.0.orig/I-TASSERmod/PPASmod 2014-07-28 09:49:02.000000000 +1200
+++ ./I-TASSERmod/PPASmod 2014-08-14 17:21:12.503305521 +1200
@@ -152,7 +152,7 @@
pos6:;
}
$Lch=length $sequence;
-open(seq,">protein.seq");
+open(seq,">protein.seq") || die "Could not create or open protein.seq for writing!\n";
printf seq ">protein\n";
for($i=1;$i<=$Lch;$i++)
{
@@ -177,13 +177,13 @@
else
{
printf "running Psi-blast .....\n";
-`$blastdir/blastpgp -b 1000 -j 3 -h 0.001 -d $db -i protein.seq -C psitmp.chk > blast.out`;
+system("$blastdir/blastpgp -b 1000 -j 3 -h 0.001 -d $db -i protein.seq -C psitmp.chk > blast.out") == 0 || die "An error occurred while running blastpgp. Stop.\n";
}

########### extract 'pre.prf' ###################
#### record multiple sequence alignment $am{i_seq,i_pos} -------->
-open(blast,"blast.out");
+open(blast,"blast.out") || die "Could not open blast.out for reading!\n";
while($line=)
{
if($line=~/Results from round\s+(\d+)/)
@@ -298,7 +298,7 @@
}
#^^^^^^^^^ Henikoff frequence finished ^^^^^^^^^^^^^

-open(freq,">protein.prf");
+open(freq,">protein.prf") || die "Could not create or open protein.prf for writing!\n";
printf freq "$Lch\n";
for($i=1;$i<=$Lch;$i++)
{
@@ -323,7 +323,7 @@
`mv psitmp.mtx protein.mtx`;

########### run zalign #############
-open(in,">in.dd");
+open(in,">in.dd") || die "Could not create or open in.dd for writing!\n";
printf in "seq.dat\n";
printf in "protein.seq\n";
printf in "protein.prf\n";
@@ -333,10 +333,10 @@
close(in);

printf "running zalign .....\n";
-`./zalign`;
+system("./zalign") == 0 || die "An error occurred while running zalign. Stop.\n";

################ calculate Z-score ######################
-open(out,"rst.dat");
+open(out,"rst.dat") || die "Could not open rst.dat for reading!\n";
$i=0;
while($line=)
{
@@ -363,7 +363,7 @@
###########################################################
##### create template file 'init.dat' #####################
###########################################################
-open(init,">init.dat");
+open(init,">init.dat") || die "Could not create or open init.dat for writing!\n";

$i_t=0;
for($i=1;$i<=$N_hit;$i++)
@@ -372,7 +372,7 @@
$template_name=~s/\./\\\./mg; #useful for match
$zscore_value=$zscore{$zscore_keys[$i-1]};
######## read the alignment -------->
- open(align,"align.dat");
+ open(align,"align.dat") || die "Could not open align.dat for reading!\n";
while($line=)
{
if($line=~/structureX:$template_name\s*\:/)
@@ -437,7 +437,7 @@
}
$i_t++;

- open(temppdb,"temp.pdb");
+ open(temppdb,"temp.pdb") || die "Could not open temp.pdb for reading!\n";
$n=0;
while($line=)
{
@@ -504,7 +504,7 @@
$time=`date`;
close(init);

-open(init1,">init1.dat");
+open(init1,">init1.dat") || die "Could not create or open init1.dat for writing!\n";
printf init1 "%5d %5d (N_temp, Lch)\n",$i_t,$Lch;
close(init1);
`cat init.dat >> init1.dat`;
@@ -517,7 +517,7 @@
`sync`;
`sync`;
sleep(1);
-`rm -fr $work_dir`;
+#SLURM `rm -fr $work_dir`;

exit();

diff -ur /home/brob695/I-TASSER4.0.orig/I-TASSERmod/runI-TASSER.pl ./I-TASSERmod/runI-TASSER.pl
--- /home/brob695/I-TASSER4.0.orig/I-TASSERmod/runI-TASSER.pl 2014-07-31 09:29:10.000000000 +1200
+++ ./I-TASSERmod/runI-TASSER.pl 2014-08-22 09:53:30.590921576 +1200
@@ -1,68 +1,108 @@
-#!/usr/bin/perl
+#!/usr/bin/perl -w
+
+use strict;
+use File::Copy;
use Math::Trig;
-use Cwd 'abs_path';
+use Cwd qw(abs_path getcwd);

+# Set this to 1 or some other TRUE value if debugging is desired.
+# Note that $DEBUG is very sparsely used at present.
+my $DEBUG = 0;

+my $curdir = &getcwd;

#########################################################################setpath
-$usrname = "your_user_name"; #useful when making tmp dir.;
-$pkgdir = "/home/$usrname/I-TASSER4.0"; #directory containing README.txt
-$libdir = "/home/$usrname/ITLIB"; #template library directory
-$seqname = "1a2bA"; #sequence name
-$datadir = "/home/$usrname/data/$seqname"; #at least has seq.fasta
-$runstyle = "serial"; #parallel or serial
-$run = "real"; #'real', use all templates; 'benchmark', homology will be removed
-$java_home = "/usr/java/latest"; #java bin will be located at $java_home/bin/java
-$n_temp = 20; #number of templates per threading algorithm
-$nc5 = 5; #number of output models
-$keeptraj = "true"; #whether to keep simulation decoys
-$lbs = "false"; #do not predict ligand-binding sites
-$ecn = "false"; #do not predict EC number
-$got = "false"; #do not predict GO terms
+my $usrname = getpwuid($>); #useful when making tmp dir.;
+my $pkgdir = "/home/$usrname/I-TASSER4.0"; #directory containing README.txt
+my $libdir = "/home/$usrname/ITLIB"; #template library directory
+my $blastdir = "$pkgdir/blast/bin";
+my $seqname = "1a2bA"; #sequence name
+my $datadir = "/home/$usrname/data/$seqname"; #at least has seq.fasta
+my $tmpdir = "/tmp/$usrname/IT$seqname"; #location of temp files
+my $tmpdir_overridden = "";
+my $runstyle = "interactive"; #interactive or batch mode
+my $run = "real"; #'real', use all templates; 'benchmark', homology will be removed
+my $java_home = "/usr/java/latest"; #java bin will be located at $java_home/bin/java
+my $sched_mem = 0; #memory required by batch-scheduled tasks
+my $account = ''; #account for scheduled jobs
+my $when_to_notify = ''; #string telling the scheduler when to notify about changes in job status
+ #(scheduler-dependent)
+my $notify_email = ''; #email address to which to send notifications
+my $n_temp = 20; #number of templates per threading algorithm
+my $nc5 = 5; #number of output models
+my $keeptraj = "true"; #whether to keep simulation decoys
+my $lbs = "false"; #do not predict ligand-binding sites
+my $ecn = "false"; #do not predict EC number
+my $got = "false"; #do not predict GO terms

my $system = $^O;
unless (lc($system) eq "linux")
{
- printf("Your operating system $system is unsupported at this time\n");
- printf("Currently only Linux is supported\n");
- exit();
+ die("Your operating system $system is unsupported at this time.\n",
+ "Currently only Linux is supported\n");
}

-
-
-
-
-$id_cut=1.0; #cut-off of sequence idendity
-if($run ne "benchmark")
+# Set the sequence identity cutoff
+my $id_cut = 1.0;
+if ($run ne "benchmark")
{
- $run="real"; #cut-off of sequence idendity
+ $run="real";
}
-
-
-if($run eq "benchmark")
+else
{
- $id_cut=0.3; #cut-off of sequence idendity
+ $id_cut=0.3;
}

################parse arguments
-$totarg=0;
-foreach $w(@ARGV)
-{
- $totarg++;
-}
-if($totarg==0)
+# It seems necessary to have at least two arguments.
+if ($#ARGV < 1)
{
&print_help();
exit();
}
+my @flagmand = qw();
$flagmand[0]=0;
$flagmand[1]=0;
$flagmand[2]=0;
$flagmand[3]=0;
$flagmand[4]=0;
$flagmand[5]=0;
-$this_hours=-1;
-for($i=0;$i<$totarg-1;$i++)
+
+my $errfile;
+my $homolog = "";
+my $hour = 50;
+my $ID;
+my @inits;
+my $jobfile;
+my $lcpdb;
+my $light = "";
+my $line1;
+my %LINELOG1;
+my $log1file = "";
+my $N;
+my $outfile;
+my $pdbchain;
+my $pdbid;
+my $seqid;
+my $time;
+my $useremail;
+my $userip;
+my $r11;
+my $r22;
+my $r33;
+my $r44;
+my $restraint1 = "";
+my $restraint2 = "";
+my $restraint3 = "";
+my $restraint4 = "";
+my $rst;
+my $str1 = "";
+my %tagnames;
+my $this_hours = -1;
+my $this_walltime = "00:00:00";
+my $walltime;
+
+for(my $i = 0; $i < $#ARGV; $i++)
{
$str1=substr($ARGV[$i+1],0,1);
if($str1 eq "-")
@@ -88,12 +128,38 @@
{
$datadir=$ARGV[$i+1];
$flagmand[3]=1;
- }
+ }
+ elsif($ARGV[$i] eq "-tmpdir")
+ {
+ $tmpdir=$ARGV[$i+1];
+ $tmpdir_overridden="true";
+ # no flagmand is necessary as tmpdir is not a mandatory setting
+ }
+ elsif($ARGV[$i] eq "-blastdir")
+ {
+ $blastdir=$ARGV[$i+1];
+ }
elsif($ARGV[$i] eq "-java_home")
{
$java_home=$ARGV[$i+1];
$flagmand[5]=1;
- }
+ }
+ elsif($ARGV[$i] eq "-schedmem")
+ {
+ $sched_mem=$ARGV[$i+1];
+ }
+ elsif($ARGV[$i] eq "-account")
+ {
+ $account=$ARGV[$i+1];
+ }
+ elsif($ARGV[$i] eq "-when-to-notify")
+ {
+ $when_to_notify=$ARGV[$i+1];
+ }
+ elsif($ARGV[$i] eq "-email")
+ {
+ $notify_email=$ARGV[$i+1];
+ }
elsif($ARGV[$i] eq "-runstyle")
{
$runstyle=$ARGV[$i+1];
@@ -104,21 +170,23 @@
}
elsif($ARGV[$i] eq "-idcut")
{
- $id_cut=$ARGV[$i+1];
- if($id_cut<0.0)
+ $id_cut = $ARGV[$i+1];
+ # id_cut must be between 0 and 1
+ if($id_cut < 0.0)
{
- $id_cut=0.0;
+ $id_cut = 0.0;
}
- elsif($id_cut>1.0)
+ elsif($id_cut > 1.0)
{
- $id_cut=1.0;
+ $id_cut = 1.0;
}
}

elsif($ARGV[$i] eq "-ntemp")
{
$n_temp=$ARGV[$i+1];
- if($n_temp>50 || $n_temp<1)
+ # ntemp must be between 1 and 50
+ if($n_temp > 50 || $n_temp < 1)
{
$n_temp=20;
}
@@ -176,129 +244,149 @@
{
$got=$ARGV[$i+1];
}
+ elsif($ARGV[$i] =~ m/^-/)
+ {
+ &print_help();
+ die "Unrecognized flag: $ARGV[$i]\n\n";
+ }
}

if($flagmand[0]==0)
{
&print_help();
- print "please set -pkgdir\n\n";
- exit();
+ die "please set -pkgdir\n\n";
}
if($flagmand[1]==0)
{
&print_help();
- print "please set -libdir\n\n";
- exit();
+ die "please set -libdir\n\n";
}
if($flagmand[2]==0)
{
&print_help();
- print "please set -seqname\n\n";
- exit();
+ die "please set -seqname\n\n";
}
if($flagmand[3]==0)
{
&print_help();
- print "please set -datadir\n\n";
- exit();
-}
-if($flagmand[4]==0)
-{
- #&print_help();
- #print "please set -usrname\n\n";
- #exit();
+ die "please set -datadir\n\n";
}
+#if($flagmand[4]==0)
+#{
+# &print_help();
+# print "please set -usrname\n\n";
+# exit();
+#}
if($flagmand[5]==0)
{
&print_help();
- print "please set -java_home\n\n";
- exit();
+ die "please set -java_home\n\n";
}

-
if($run eq "real" && $id_cut<1.0)
{
$id_cut=1.0;
}

-if($runstyle ne "parallel") #if a wrong value
+if($runstyle ne "batch") #if a wrong value
{
- $runstyle = "serial";
+ $runstyle = "interactive";
}
else
{
- if ( !( `which qstat` ) || !( `which qsub` ))
+ if ( !( `which squeue` ) || !( `which sbatch` ))
+ {
+ die("\n",
+ "You can not set \"-runstyle=batch\" because your system does not support\n",
+ "the commands: squeue and sbatch.\n",
+ "To support squeue and sbatch, you have to install the Slurm job scheduler.\n");
+ }
+ $sched_mem ||= $ENV{ITASSER_SCHEDMEM};
+ if ($sched_mem !~ m/^\d+$/ || $sched_mem < 3072)
+ {
+ &print_help;
+ die("Error: At least 3072 MB of RAM must be requested using the -schedmem\n",
+ "flag or the ITASSER_SCHEDMEM environment variable!\n");
+ }
+ $account ||= $ENV{ITASSER_SCHEDACCT};
+ if (!$account)
+ {
+ &print_help;
+ die("Error: An account must be specified using the -account flag or the\n",
+ "ITASSER_SCHEDACCT environment variable!\n");
+ }
+ $when_to_notify ||= $ENV{ITASSER_NOTIFYWHEN};
+ if (!$when_to_notify)
{
- print "\nYou can not set \"-runstyle =parallel\" because your system dose not support the commands: qsub and qstat.\n";
- print "To support qsub and qstat, you have to install the job management software PBS server.\n\n";
- exit;
+ &print_help;
+ die("Error: Notification triggers must be specified using the\n",
+ "-when-to-notify flag or the ITASSER_NOTIFYWHEN environment variable!\n");
+ }
+ $notify_email ||= $ENV{ITASSER_NOTIFYEMAIL};
+ if (!$notify_email)
+ {
+ &print_help;
+ die("Error: A notification email address must be supplied using the -email\n",
+ "flag or the ITASSER_NOTIFYEMAIL environment variable!\n");
}
}

if ( !( `which tar` ))
{
- print "Your system does not have the compression software \"tar\" installed\n";
- exit;
+ die "Your system does not have the compression software \"tar\" installed!\n";
}
if ( !( `which bzip2` ))
{
- print "Your system does not have the compression software \"bzip2\" installed\n";
- exit;
+ die "Your system does not have the compression software \"bzip2\" installed!\n";
}

-if($light eq "true")
-{
-}
-else
+# Update tmpdir to SCRATCH_DIR if appropriate
+if ($ENV{SCRATCH_DIR} && !$tmpdir_overridden)
{
- $light="false";
+ #print "Note: Overriding default temporary directory\n";
+ #print "New temporary directory: $ENV{SCRATCH_DIR}\n";
+ $tmpdir = $ENV{SCRATCH_DIR};
}

+$light = "false" if ($light ne "true");

-$hour=50; #$time=$hour_max if(Lch>220), 24*3=72, 24*4=96, 24*5=120
-if($light eq "true")
-{
- $hour = 5;
- if($this_hours > 0)
- {
- $hour=$this_hours;
- }
-}
-
+#$time=$hour_max if(Lch>220), 24*3=72, 24*4=96, 24*5=120
+$hour = 5 if ($light eq "true");
+$hour = $this_hours if($this_hours > 0);

my $fname=""; # in case the user provide a file name
($datadir, $fname)=&get_absolute_path($datadir);
-$usrname=getlogin() || $ENV{'USER'};
-
+#$usrname=getlogin() || $ENV{'USER'};

-printf "\nYour setting for running I-TASSER is:\n";
-printf "-pkgdir = $pkgdir\n";
-printf "-libdir = $libdir\n";
-printf "-java_home = $java_home\n";
-printf "-seqname = $seqname\n";
-printf "-datadir = $datadir\n";
-printf "-runstyle = $runstyle\n";
-printf "-homoflag = $run\n";
-printf "-idcut = $id_cut\n";
-printf "-ntemp = $n_temp\n";
-printf "-nmodel = $nc5\n";
-printf "-light = $light\n";
-printf "-hours = $hour\n";
-printf "-LBS = $lbs\n";
-printf "-EC = $ecn\n";
-printf "-GO = $got\n\n";
+print "Your setting for running I-TASSER is:\n";
+print "-pkgdir = $pkgdir\n";
+print "-libdir = $libdir\n";
+print "-blastdir = $blastdir\n";
+print "-java_home = $java_home\n";
+print "-seqname = $seqname\n";
+print "-datadir = $datadir\n";
+print "-tmpdir = $tmpdir\n";
+print "-runstyle = $runstyle\n";
+print "-homoflag = $run\n";
+print "-idcut = $id_cut\n";
+print "-ntemp = $n_temp\n";
+print "-nmodel = $nc5\n";
+print "-light = $light\n";
+print "-hours = $hour\n";
+print "-LBS = $lbs\n";
+print "-EC = $ecn\n";
+print "-GO = $got\n\n";

-$blastdir = "$pkgdir/blast/bin";
-$db = "$libdir/nr/nr";
+#$blastdir = "$pkgdir/blast/bin";
+my $db = "$libdir/nr/nr";

if(!-s "$db")
{
- print "Your library files are not complete, please download library with the script download_lib.pl\n";
- exit;
+ die "Some I-TASSER libraries are missing. Please run the script download_lib.pl before continuing.\n";
}

###############################
-%ts=(
+my %ts=(
'GLY'=>'G',
'ALA'=>'A',
'VAL'=>'V',
@@ -375,84 +463,79 @@
'Z'=>'GLN',
'X'=>'GLY',
);
+
+my $errmessage = '';
+
#################### jobstart ###################################################
-if(!-s "$datadir/seq.fasta")
+if (!-s "$datadir/seq.fasta")
{
- print "Error: no input file $datadir/seq.fasta\n";
- goto ITpositionend;
+ die "Error: Input file $datadir/seq.fasta not found!\n";
}

-if(!-s "$java_home/bin/java" && !-s "$datadir/model1.pdb")
+if (!-s "$java_home/bin/java" && !-s "$datadir/model1.pdb")
{
- print "Error: $java_home/bin/java does not exist\nPlease check if -java_home was correct\n";
- goto ITpositionend;
+ die("Error: $java_home/bin/java does not exist!\n",
+ "Please check if -java_home is set correctly.\n");
}

-
-
-$unicode="IT$seqname"; #useful when making tmp dir and setting unique jobname
-$tmpdir="/tmp/$usrname/$unicode";
-`mkdir -p $tmpdir`;
+my $unicode="IT$seqname"; #useful when making tmp dir and setting unique jobname
+#$tmpdir="/tmp/$usrname/$unicode";
+system("mkdir -p $tmpdir") == 0 || die "Unable to create temporary directory: $tmpdir\n";
chdir "$tmpdir";
-if(!-d "$tmpdir")
-{
- printf "Fail make temporary directory $tmpdir\n";
- print "Please make sure the directory /tmp/ is writable for you\n ";
- exit();
-}

ITposition1:;
-printf "1. make seq.txt and rmsinp\n";
+print "Step 1 of 7: Generating seq.txt and rmsinp files ...\n";
############convert 'seq.fasta' to 'seq.txt' with standard format ####
if(-s "$datadir/seq.txt" && -s "$datadir/rmsinp")
{
#goto ITposition2;
}

-open(seqtxt,"$datadir/seq.fasta");
-$sequence="";
-while($line=)
+open(SEQTXT,"$datadir/seq.fasta") || die "Could not open file for reading: $datadir/seq.fasta\n";
+my $sequence="";
+my $line = "";
+while($line=)
{
- goto pos1_1 if($line=~/^>/);
- if($line=~/(\S+)/)
+ if($line=~/^>/)
{
- $sequence .=$1;
+ next;
+ }
+ elsif($line=~/(\S+)/)
+ {
+ $sequence .= $1;
}
- pos1_1:;
}
-close(seqtxt);
-$Lch=length $sequence;
+close(SEQTXT);
+my $Lch=length $sequence;
if($Lch<10 || $Lch >1500)
{
- printf "error: the sequence length is not in the range [10, 1500]\n";
- goto ITpositionend;
+ die "Error: the sequence length should be at least 10 and at most 1,500!\n";
}

-open(fasta,">$datadir/seq.txt");
-printf fasta "> $seqname\n";
-for($i=1;$i<=$Lch;$i++)
+open(FASTA,">$datadir/seq.txt") || die "Could not create or open file for writing: $datadir/seq.txt\n";
+print FASTA "> $seqname\n";
+for(my $i=1;$i<=$Lch;$i++)
{
- $seq1=substr($sequence,$i-1,1);
- $seq2=$ts{$seq1};
+ my $seq1=substr($sequence,$i-1,1);
+ my $seq2=$ts{$seq1};
$seq1=$ts{$seq2};
- printf fasta "$seq1";
+ print FASTA "$seq1";
if(int($i/60)*60==$i)
{
- printf fasta "\n";
+ print FASTA "\n";
}
}
-printf fasta "\n";
-close(fasta);
+print FASTA "\n";
+close(FASTA);

print "Your protein contains $Lch residues:\n";
print `cat $datadir/seq.txt`;

-open(rmsinp,">$datadir/rmsinp");
-printf rmsinp "1 $Lch\n";
-printf rmsinp "$Lch\n";
-printf rmsinp "protein\n";
-close(rmsinp);
-
+open(RMSINP,">$datadir/rmsinp") || die "Could not create or open file for writing: $datadir/rmsinp\n";
+print RMSINP "1 $Lch\n";
+print RMSINP "$Lch\n";
+print RMSINP "protein\n";
+close(RMSINP);

if(-s "$datadir/model1.pdb")
{
@@ -463,99 +546,94 @@
ITposition2:;
########### run psi-blast #######################

-printf "2.1 run Psi-blast\n";
+print "Step 2.1 of 7: Running Psi-blast ...\n";
if(-s "$datadir/psitmp.chk" && -s "$datadir/blast.out" && -s "$datadir/pssm.txt")
{
goto pos2_1;
}
else
{
- `cp $datadir/seq.txt $tmpdir/protein.fasta`;
- `$blastdir/blastpgp -b 1000 -j 3 -h 0.001 -d $db -i protein.fasta -C psitmp.chk -Q pssm.txt > blast.out`;
- if(!-s "psitmp.chk")
- {
- print "$blastdir/blastpgp is not working properly, please check\n";
- exit;
- }
- `cp psitmp.chk $datadir/psitmp.chk`;
- `cp blast.out $datadir/blast.out`;
- `cp pssm.txt $datadir/pssm.txt`;
+ # Use of system instead of backticks lets us capture the exit code
+ # if necessary
+ copy("$datadir/seq.txt", "$tmpdir/protein.fasta") || die "Could not copy $datadir/seq.txt to $tmpdir/protein.fasta\n";
+ system("$blastdir/blastpgp -b 1000 -j 3 -h 0.001 -d $db -i protein.fasta -C psitmp.chk -Q pssm.txt > blast.out");
+ copy("psitmp.chk", "$datadir/psitmp.chk") || die "Could not copy psitmp.chk into $datadir\n";
+ copy("blast.out", "$datadir/blast.out") || die "Could not copy blast.out into $datadir\n";
+ copy("pssm.txt", "$datadir/pssm.txt") || die "Could not copy pssm.txt into $datadir\n";
}
pos2_1:;
########### make mtx ####################
if(-s "$datadir/mtx")
{
- goto pos2_2;
+ goto pos2_2;
}

if(-s "$tmpdir/protein.fasta")
{
- `cp $tmpdir/protein.fasta $tmpdir/psitmp.fasta`;
+ copy("$tmpdir/protein.fasta", "$tmpdir/psitmp.fasta") || die "Could not copy $tmpdir/protein.fasta to $tmpdir/psitmp.fasta\n";
}
else
{
- `cp $datadir/seq.txt $tmpdir/psitmp.fasta`;
+ copy("$datadir/seq.txt", "$tmpdir/psitmp.fasta") || die "Could not copy $datadir/seq.txt to $tmpdir/psitmp.fasta\n";
}
-`echo psitmp.fasta > psitmp.sn`;
+system("echo psitmp.fasta > psitmp.sn");
if(!-s "$tmpdir/psitmp.chk")
{
- `cp $datadir/psitmp.chk $tmpdir/psitmp.chk`;
+ copy("$datadir/psitmp.chk", "$tmpdir/psitmp.chk") || die "Could not copy psitmp.chk from $datadir to $tmpdir\n";
}
-`echo psitmp.chk > psitmp.pn`;
-`$blastdir/makemat -P psitmp`;
-`cp psitmp.mtx $datadir/mtx`;
-`cp psitmp.mtx mtx`;
+system("echo psitmp.chk > psitmp.pn");
+system("$blastdir/makemat -P psitmp");
+copy("psitmp.mtx", "$datadir/mtx") || die "Could not copy psitmp.mtx into $datadir\n";
+copy("psitmp.mtx", "mtx") || die "Coud not copy psitmp.mtx into the current directory\n";

pos2_2:;

########### run PSSpred to predict the secondary structure ########################
-`cp $datadir/seq.fasta .`;
+copy("$datadir/seq.fasta", "seq.fasta") || die "Could not copy seq.fasta from $datadir\n";
if(!-s "$datadir/seq.ss")
{
- print "2.2 Predict secondary structure with PSSpred...\n";
+ print "Step 2.2 of 7: Predicting secondary structure with PSSpred ...\n";
if(-s "seq.fasta")
{
- `perl $pkgdir/PSSpred/mPSSpred.pl seq.fasta $pkgdir $libdir`;
+ system("perl $pkgdir/PSSpred/mPSSpred.pl seq.fasta $pkgdir $libdir");
}
elsif(-s "seq.txt")
{
- `perl $pkgdir/PSSpred/mPSSpred.pl seq.txt $pkgdir $libdir`;
+ system("perl $pkgdir/PSSpred/mPSSpred.pl seq.txt $pkgdir $libdir");
}
else
{
- printf "error: without $datadir/seq.fasta && $datadir/seq.txt\n";
- goto ITpositionend;
+ die "Error: neither $datadir/seq.ss, nor seq.fasta, nor seq.txt is present!\n";
}
- `cp mtx $datadir/` if(-s "mtx");
- `cp blast.out $datadir/` if(-s "blast.out");
- `cp pssm.txt $datadir/` if(-s "pssm.txt");
- `cp psitmp.chk $datadir/` if(-s "psitmp.chk");
- `cp seq.dat $datadir/` if(-s "seq.dat");
- `cp seq.dat.ss $datadir/seq.ss` if(-s "seq.dat.ss");
+ my @files = qw(mtx blast.out pssm.txt psitmp.chk seq.dat seq.dat.ss);
+ foreach my $file (@files) {
+ if (-s "$file") {
+ copy("$file", "$datadir/$file") || die "Could not copy $file into $datadir\n";
+ }
+ }
}
else
{
- print "2.2 Secondary structure prediction was done before.\n";
-
+ print "Step 2.2 of 7 (secondary structure prediction) has already been done\n";
}

-printf "2.3 Predict solvent accessibility...\n";
+print "Step 2.3 of 7: Predicting solvent accessibility ...\n";
pos2_3:;
if(-s "$datadir/exp.dat")
{
- open(explist,"$datadir/exp.dat");
- $n_line=0;
- while($expline=)
+ open(EXPLIST,"$datadir/exp.dat") || die "Could not open file for reading: $datadir/exp.dat\n";
+ my $n_line=0;
+ while()
{
$n_line++;
}
- close(explist);
+ close(EXPLIST);
if($n_line>=2)
{
goto pos2_4;
}
}
-@ww=qw(
+my @ww=qw(
0.05
0.10
0.15
@@ -574,67 +652,71 @@
0.80
0.85
);
-`cp $pkgdir/bin/solve .`;
-`cp $pkgdir/data/wgt.tar.bz2 .`;
-`tar -jxvf wgt.tar.bz2`;
-`cp $datadir/pssm.txt protein.mat3`;
+copy("$pkgdir/data/wgt.tar.bz2", "wgt.tar.bz2") || die "Could not copy wgt.tar.bz2 from $pkgdir/data\n";
+system("tar -jxvf wgt.tar.bz2") == 0 || die "Could not untar file wgt.tar.bz2\n";
+copy("$datadir/pssm.txt", "protein.mat3") || die "Could not copy $datadir/pssm.txt to protein.mat3\n";

+my %exp;
############# run solvent ##################
-foreach $w(@ww)
+foreach my $w(@ww)
{
- `./solve weight.$w protein`; #protein.neu
- open(neu,"protein.neu");
- =~/(\d+)/;
+ system("$pkgdir/bin/solve weight.$w protein");
+ open(NEU,"protein.neu") || die "After running \"solve\" program with weight $w, could not open file protein.neu for reading!\n";
+ =~/(\d+)/;
$Lch=$1;
- for($i=1;$i<=$Lch;$i++)
+ for(my $i=1;$i<=$Lch;$i++)
{
- =~/(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)/;
+ =~/(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)/;
$exp{$i,$w}=$5;
}
- close(neu);
+ close(NEU);
}
-open(exp1,">protein.exp");
-printf exp1 "$Lch ";
-foreach $w(@ww)
+open(EXP1,">protein.exp") || die "Could not create or open file protein.exp for writing!\n";
+printf EXP1 "$Lch ";
+foreach my $w(@ww)
{
- printf exp1 " %5s",$w;
+ printf EXP1 " %5s",$w;
}
-printf exp1 "\n";
-for($i=1;$i<=$Lch;$i++)
+printf EXP1 "\n";
+for(my $i=1;$i<=$Lch;$i++)
{
- printf exp1 "%5d",$i;
- foreach $w(@ww)
+ printf EXP1 "%5d",$i;
+ foreach my $w(@ww)
{
- printf exp1 " %5d",$exp{$i,$w};
+ printf EXP1 " %5d",$exp{$i,$w};
}
- printf exp1 "\n";
+ printf EXP1 "\n";
}
-close(exp1);
-`cp protein.exp $datadir/exp.dat`;
-if(!-s "$datadir/exp.dat") #in case it fails
-{
+close(EXP1);
+if (! copy("protein.exp", "$datadir/exp.dat")) {
print "Solvent accessibility prediction fails, re-run...\n";
goto pos2_3;
}

pos2_4:;
-printf "2.4 run pairmod\n";
+printf "Step 2.4 of 7: Running pairmod ...\n";
if(-s "$datadir/pair1.dat" && -s "$datadir/pair3.dat")
{
print "pair exist\n";
goto ITposition3;
}
-`cp $pkgdir/bin/align .`;
-`cp -f $pkgdir/data/matrix1.comm .`;
-`cp -f $pkgdir/data/matrix3.comm .`;
-`cp -f $pkgdir/data/blosum.comm .`;
-`cp -f $pkgdir/bin/pair99 ./pair`;
-`cp -f $datadir/seq.dat .`;
+
+#my @files_to_delete = qw(align pair matrix1.comm matrix3.comm blosum.comm seq.dat);
+my @files_to_delete = qw(matrix1.comm matrix3.comm blosum.comm seq.dat);
+foreach my $file_to_delete (@files_to_delete) {
+ unlink "$file_to_delete" || print "Warning: Unable to delete file: $file_to_delete\n";
+}
+#copy("$pkgdir/bin/align", "align") || die "Could not copy $pkgdir/bin/align!\n";
+copy("$pkgdir/data/matrix1.comm", "matrix1.comm") || die "Could not copy $pkgdir/data/matrix1.comm!\n";
+copy("$pkgdir/data/matrix3.comm", "matrix3.comm") || die "Could not copy $pkgdir/data/matrix3.comm!\n";
+copy("$pkgdir/data/blosum.comm", "blosum.comm") || die "Could not copy $pkgdir/data/blosum.comm!\n";
+#copy("$pkgdir/bin/pair99", "pair") || die "Could not copy $pkgdir/bin/pair99!\n";
+copy("$datadir/seq.dat", "seq.dat") || die "Could not copy $datadir/seq.dat!\n";
# read sequence file ------------------>
-@seqtxts=`cat $datadir/seq.txt`;
+my @seqtxts=`cat $datadir/seq.txt`;
$sequence="";
-foreach $seqtxt(@seqtxts)
+foreach my $seqtxt(@seqtxts)
{
goto pos2_4_1 if($seqtxt=~/\>/);
$seqtxt=~s/\s//mg;
@@ -645,62 +727,69 @@
$Lch=length $sequence;

# make sequence file ------------------>
-open(seq,">protein.seq");
-printf seq ">protein\n";
-for($i=1;$i<=$Lch;$i++)
+open(SEQ,">protein.seq") || die "Could not create or open file protein.seq for writing!\n";
+printf SEQ ">protein\n";
+for(my $i=1;$i<=$Lch;$i++)
{
- $a=substr($sequence,$i-1,1);
- printf seq "$a";
+ my $a=substr($sequence,$i-1,1);
+ printf SEQ "$a";
if($i==int($i/60)*60)
{
- printf seq "\n";
+ printf SEQ "\n";
}
}
-printf seq "\n";
-close(seq);
+printf SEQ "\n";
+close(SEQ);

######## make msa.aln file ------------------>

-open(msa,">$seqname\.aln");
-printf msa "%5d %5d $seqname\n",1,$Lch;
-for($j=1;$j<=$Lch;$j++)
+open(MSA,">$seqname\.aln") || die "Could not create or open file $seqname\.aln for writing!\n";
+printf MSA "%5d %5d $seqname\n",1,$Lch;
+
+my $m;
+for(my $j=1;$j<=$Lch;$j++)
{
- $a=substr($sequence,$j-1,1);
- printf msa "$a";
+ my $a=substr($sequence,$j-1,1);
+ printf MSA "$a";
$m=0;
if($j==int($j/50)*50)
{
- printf msa "\*\n";
+ printf MSA "\*\n";
$m=1;
}
}
if($m==0)
{
- printf msa "\n";
+ printf MSA "\n";
}
-close(msa);
+close(MSA);
### decided library list -------------------------------->
-$plibdir="$libdir";
+my $n;
+my $plibdir="$libdir";
if($run eq "real" || $id_cut >=1.0)
{
- printf "2.4.1 Use all templates\n";
- `cp $plibdir/PDB/list ./list`;
+ print "Step 2.4.1 of 7: Copying templates ...\n";
+ copy("$plibdir/PDB/list", "list") || die "Could not copy file $plibdir/PDB/list!\n";
}
else
{
- printf "2.4.1 removing homology templates based on $run and $id_cut\n";
- open(listall,"$plibdir/PDB/list");
- =~/(\d+)/;
+ print "Step 2.4.1 of 7: Removing homology templates based on $run and $id_cut ...\n";
+ open(LISTALL,"$plibdir/PDB/list") || die "Could not open file $plibdir/PDB/list for reading!\n";
+ =~/(\d+)/;
$n=$1; #total number of templates
- $k=0;
- for($i=1;$i<=$n;$i++)
- {
- =~/(\S+)/;
- $p=$1;
- $ali=`./align protein.seq $plibdir/PDB/$p\.pdb 2`;
+ my $k=0;
+ my %pp;
+ for(my $i=1;$i<=$n;$i++)
+ {
+ =~/(\S+)/;
+ my $p=$1;
+ my $ali=`$pkgdir/bin/align protein.seq $plibdir/PDB/$p\.pdb 2`;
+ if ($? != 0) {
+ die "Error: Alignment of protein sequence with $plibdir/PDB/$p\.pdb failed!\n";
+ }
$ali=~/Identical length\:\s*(\d+)/;
- $L_id=$1;
- $id=$L_id/$Lch;
+ my $L_id=$1;
+ my $id=$L_id/$Lch;
#printf "$p $id $i\n";
if($id<$id_cut)
{
@@ -709,45 +798,47 @@
$pp{$k}=$p;
}
}
- close(listall);
- $K=$k;
- open(list,">list");
- printf list "$K\n";
- for($i=1;$i<=$K;$i++)
- {
- printf list "$pp{$i}\n";
- }
- close(list);
-}
-open(tmp,">LIST.target");
-print tmp "1\n";
-print tmp "$seqname\n";
-close(tmp);
-open(tmp,">$seqname\.homol");
-print tmp "0\n";
-close(tmp);
-
+ close(LISTALL);
+ my $K=$k;
+ open(LIST,">list") || die "Could not create or open file \"list\" for writing!\n";
+ printf LIST "$K\n";
+ for(my $i=1;$i<=$K;$i++)
+ {
+ printf LIST "$pp{$i}\n";
+ }
+ close(LIST);
+}
+open(TMP,">LIST.target") || die "Could not create or open file LIST.target for writing!\n";
+print TMP "1\n";
+print TMP "$seqname\n";
+close(TMP);
+open(TMP,">$seqname\.homol") || die "Could not create or open file $seqname\.homol for writing!\n";
+print TMP "0\n";
+close(TMP);

-
-if($runstyle eq "parallel") #submit threading first when use qsub to save time
+if($runstyle eq "batch") #submit threading first when use sbatch to save time
{
print "submit threading jobs first and run pair during threading\n";
goto ITposition3;
}

# run pair ------------------------------------------->
-printf "2.4.2 running pair ................\n";
-print `./pair $plibdir/`;
-print "pair done\n";
-`sync`; #flush filesystem buffers
+print "Step 2.4.2 of 7: Running pair ...\n";
+print `$pkgdir/bin/pair99 $plibdir/`;
+if ($? == 0) {
+ print "Pair done.\n";
+} else {
+ die "Error: Pair failed!\n";
+}
+system("sync") == 0 || die "Buffer synchronisation failed!\n"; #flush filesystem buffers
sleep(1);
# copy back the output files -------------------------->
-`cp -f pair.3 $datadir/pair3.dat`;
-`cp -f pair.1 $datadir/pair1.dat`;
+copy("pair.3", "$datadir/pair3.dat") || die "Could not copy pair.3 to $datadir as pair3.dat!\n";
+copy("pair.1", "$datadir/pair1.dat") || die "Could not copy pair.1 to $datadir as pair1.dat!\n";

ITposition3:;
-printf "3.1 do threading\n";
-@TT=qw(
+print "Step 3.1 of 7: Doing threading ...\n";
+my @TT=qw(
PPAS
dPPAS
dPPAS2
@@ -758,8 +849,8 @@
wMUSTER
); #threading programs

-$totinit=0;
-foreach $T(@TT)
+my $totinit=0;
+foreach my $T(@TT)
{
if(-s "$datadir/init.$T")
{
@@ -771,16 +862,19 @@
#goto pos3_1;
}

-
-foreach $T(@TT)
+foreach my $T(@TT)
{
if(!-s "$datadir/init.$T")
{
- $tag="$T\_$seqname"; # unique name
- $jobmod="$T"."mod";
- $jobname="$datadir/$tag";
- #------- jobname ------>
- $mod=`cat $pkgdir/I-TASSERmod/$jobmod`;
+ my $tag="$T\_$seqname"; # unique name
+ my $jobmod="$T"."mod";
+ $jobfile="$datadir/$tag";
+ my $slurmfile="$jobfile.sbatch";
+ #------- jobfile ------>
+ my $mod=`cat $pkgdir/I-TASSERmod/$jobmod`;
+ if ($? != 0) {
+ die "Error: Unable to read job template from $pkgdir/I-TASSERmod/$jobmod!\n";
+ }
$mod=~s/\!S\!/$seqname/mg;
$mod=~s/\!O\!//mg;
$mod=~s/\!ID_CUT\!/$id_cut/mg;
@@ -795,30 +889,69 @@
$mod=~s/\!NRLIB\!/$db/mg;
$mod=~s/\!RUN\!/$run/mg;
$mod=~s/\!JAVA_PATH\!/$java_home/mg;
- open(job,">$jobname");
- print job "$mod\n";
- close(job);
- `chmod a+x $jobname`;
+
+ open(JOB,">$jobfile") || die "Could not create or open $jobfile for writing!\n";
+
+ if ($runstyle eq "batch")
+ {
+ # Overwrite the working directory
+ $mod =~ s/work_dir=\"\/tmp.*$/work_dir="\$ENV{SCRATCH_DIR}";/mg;
+ open(SLURMFILE,">$slurmfile") || die "Could not create or open $slurmfile for writing!\n";
+ # Print some things to the job that have to be there
+ print SLURMFILE "#!/bin/bash\n";
+ print SLURMFILE "\n";
+ print SLURMFILE "#SBATCH -J $tag\n";
+ print SLURMFILE "#SBATCH -A $account\n";
+ if ($this_walltime ne '00:00:00')
+ {
+ print SLURMFILE "#SBATCH -t $this_walltime\n";
+ }
+ else
+ {
+ print SLURMFILE "#SBATCH -t 72:00:00\n";
+ }
+ # In the original Perl, 1000 MB of memory are requested
+ print SLURMFILE "#SBATCH --mem-per-cpu=$sched_mem"."M\n";
+ print SLURMFILE "#SBATCH --workdir=$datadir\n";
+ print SLURMFILE "#SBATCH -o out_$tag\n";
+ print SLURMFILE "#SBATCH -e err_$tag\n";
+ print SLURMFILE "#SBATCH --mail-type=$when_to_notify\n";
+ print SLURMFILE "#SBATCH --mail-user=$notify_email\n";
+ print SLURMFILE "\n";
+ print SLURMFILE "echo \"Initial environment variables\"\n";
+ print SLURMFILE "echo \"-----------------------------\"\n";
+ print SLURMFILE "env | sort\n";
+ print SLURMFILE "/usr/bin/perl $jobfile\n";
+ print SLURMFILE "echo \"Final environment variables\"\n";
+ print SLURMFILE "echo \"---------------------------\"\n";
+ print SLURMFILE "env | sort\n";
+ close(SLURMFILE);
+ }
+ if ($DEBUG) {
+ $mod =~ s/work_dir=.*$/work_dir="$datadir\/tmp\/$T";/mg;
+ }
+ print JOB "$mod\n";
+ close(JOB);
+ # I don't think the job description file has to be executable in
+ # Slurm.
printf "start $runstyle threading $T\n";
- if($runstyle eq "parallel")
+ if($runstyle eq "batch")
{
- my $running=`qstat -f`;
+ my $running=`squeue -u $usrname --format="%24j"`;
if($running =~ /$tag/)
{
print "$tag is running, skip\n";
next;
}
- $errfile ="$datadir/err_$tag";
- $outfile ="$datadir/out_$tag";
- $walltime ="walltime=72:00:00,mem=1000mb";
- $bsub=`qsub -e $errfile -o $outfile -l $walltime -N $tag $jobname`;
+ system("sbatch $slurmfile") == 0 || die "Job submission to Slurm failed!\n";
}
else
{
- print `pwd`;
- print "$jobname\n";
- $rst=`$jobname`;
- printf "$rst\n";
+ print "Current directory: ".getcwd()."\n";
+ print "Job file: $jobfile\n";
+ system("chmod a+x $jobfile") == 0 || die "Could not make $jobfile executable!\n";
+ $rst=`$jobfile`;
+ print "$rst\n";
}
}
else
@@ -831,7 +964,7 @@
{
# run pair ------------------------------------------->
printf "running pair now ................\n";
- print `./pair $plibdir/`;
+ print `$pkgdir/bin/pair99 $plibdir/`;
print "pair done\n";
`sync`; #flush filesystem buffers
sleep(1);
@@ -840,12 +973,11 @@
`cp -f pair.1 $datadir/pair1.dat`;
}

-
$totinit=0;
while($totinit<@TT)
{
$totinit=0;
- foreach $T(@TT)
+ foreach my $T(@TT)
{
if(-s "$datadir/init.$T")
{
@@ -855,10 +987,10 @@
last if($totinit>=@TT);

my $flag=0;
- if($runstyle ne "serial") #job running with qsub
+ if($runstyle ne "interactive") #job running in batch mode
{
- my $running=`qstat -f`;
- foreach $T(@TT)
+ my $running=`squeue -u $usrname --format="%24j"`;
+ foreach my $T(@TT)
{
my $tag="$T\_$seqname"; # unique name
if($running =~ /$tag/)
@@ -882,10 +1014,10 @@
}
}

-if($totinit<@TT/2)
+my $halfTT = @TT / 2;
+if ($totinit < $halfTT)
{
- printf "only $totinit threading programs have output, please check threading programs\n";
- goto ITpositionend;
+ die "Error: only $totinit threading programs have output. At least $halfTT are expected.\n";
}

@@ -894,13 +1026,31 @@

if($restraint1 || $restraint2 || $restraint3 || $restraint4)
{
+ my $atom_name;
+ my $LchQ;
+ my $LchT;
+ my $LchT2;
+ my $LchTT;
+ my $nT;
+ my $pdblen;
+ my $R2;
+ my $res_name;
+ my $res_no;
+ my %seqQ;
+ my $sequenceQ;
+ my $sequenceQQ;
+ my $sequenceT;
+ my $sequenceTT;
+ my $Temp;
+ my $Tname;
+
if(($restraint2 && $restraint3) || ($restraint2 && $restraint4) ||($restraint3 && $restraint4))
- {
+ {
print LOG1 "Multiple restraints of same type\n";
$errmessage="ERROR\nYou have specified a template protein to be used during the modeling in more than one format. Please specify a template protein in a single format and submit your sequence again.\n";
goto printerr;
}
- for($i=1;$i<=$Lch;$i++)
+ for(my $i=1;$i<=$Lch;$i++)
{
$a=substr($sequence,$i-1,1);
$seqQ{$i}=$a;
@@ -1026,7 +1176,9 @@

$R2 .="$line1\n";
$r22 .= "$line1";
- while($line1=)
+ my $ATOM;
+ my $atom;
+ while(my $line1=)
{
$R2 .="$line1";
$r22 .= "$line1";
@@ -1066,11 +1218,12 @@

if(($LchQ != $LchT)||($nT < $LchTT))
{
- open(LOG1,">$log1file");
+ open(LOG1,">$log1file") || die "Could not create or open $log1file for writing!\n";
print LOG1 "\n";
- printf LOG1 "%5d %6s->%20s(%4d) %25s %20s $time $sequence\n",$N,'NA',substr($ID,0,20),$Lch,$useremail,$userip;
+ printf LOG1 "%5d %6s->%20s(%4d) %25s %20s $time $sequence\n",
+ $N,'NA',substr($ID,0,20),$Lch,$useremail,$userip;
print LOG1 "Wrong format of threading alignment\n";
- for($i=1;$i<=$n;$i++)
+ for(my $i=1;$i<=$n;$i++)
{
print LOG1 "$LINELOG1{$i}";
}
@@ -1092,11 +1245,12 @@
$LchQ=length($sequenceQQ);
if(($LchQ != $Lch) || ($sequenceQQ ne $sequence))
{
- open(LOG1,">$log1file");
+ open(LOG1,">$log1file") || die "Could not create or open $log1file for writing!\n";
print LOG1 "\n";
- printf LOG1 "%5d %6s->%20s(%4d) %25s %20s $time $sequence\n",$N,'NA',substr($ID,0,20),$Lch,$useremail,$userip;
+ printf LOG1 "%5d %6s->%20s(%4d) %25s %20s $time $sequence\n",
+ $N,'NA',substr($ID,0,20),$Lch,$useremail,$userip;
print LOG1 "Wrong format of threading alignment\n";
- for($i=1;$i<=$n;$i++)
+ for(my $i=1;$i<=$n;$i++)
{
print LOG1 "$LINELOG1{$i}";
}
@@ -1126,10 +1280,10 @@
if($line1=~/Rank\s*:\s*\d+ Template name:\s*(\S+)/){$Tname=$1; $r22 .= "REMARK Template name:$Tname\n";}
if($line1=~/^ATOM/)
{
- $k=substr($line1,22,4); #order number of target residues
+ my $k=substr($line1,22,4); #order number of target residues
$k=~s/\s+//mg;
- $res=substr($line1,17,3);
- $aa=$ts{$res}; #real aa
+ my $res=substr($line1,17,3);
+ my $aa=$ts{$res}; #real aa
if($aa eq $seqQ{$k})
{
$R2 .= "$line1";
@@ -1137,12 +1291,12 @@
}
if($aa ne $seqQ{$k})
{
- open(LOG1,">$log1file");
+ open(LOG1,">$log1file") || die "Could not create or open $log1file for writing!\n";
print LOG1 "\n";
printf LOG1 "%5d %6s->%20s(%4d) %25s %20s $time $sequence\n",
$N,'NA',substr($ID,0,20),$Lch,$useremail,$userip;
print LOG1 "Wrong format of PDB format threading alignment\n";
- for($i=1;$i<=$n;$i++)
+ for(my $i=1;$i<=$n;$i++)
{
print LOG1 "$LINELOG1{$i}";
}
@@ -1167,7 +1321,8 @@
$R2 .="Specified template without alignment:";
if($restraint3=~/(\S+)\:(\S+)/)
{
- $pdbid=$1;$lcpdb=lc($pdbid);
+ $pdbid=$1;
+ $lcpdb=lc($pdbid);
$pdbchain=$2;
}
else
@@ -1186,12 +1341,12 @@
}
else
{
- open(LOG1,">$log1file");
+ open(LOG1,">$log1file") || die "Could not create or open $log1file for writing!\n";
print LOG1 "\n";
printf LOG1 "%5d %6s->%20s(%4d) %25s %20s $time $sequence\n",
$N,'NA',substr($ID,0,20),$Lch,$useremail,$userip;
print LOG1 "Wrong format of PDBid:Chain\n";
- for($i=1;$i<=$n;$i++)
+ for(my $i=1;$i<=$n;$i++)
{
print LOG1 "$LINELOG1{$i}";
}
@@ -1210,7 +1365,7 @@
if($restraint4)
{
$R2 .="Specified template without alignment: (Alignment will be generated using MUSTER threading program)";
- open(IN,"$datadir/$restraint4")||die "can not open $datadir/$restraint4";
+ open(IN,"$datadir/$restraint4") || die "Could not open $datadir/$restraint4 for reading!\n";
while($line1=)
{
$r44 .= $line1;
@@ -1229,12 +1384,12 @@
}
else
{
- open(LOG1,">$log1file");
+ open(LOG1,">$log1file") || die "Could not create or open $log1file for writing!\n";
print LOG1 "\n";
printf LOG1 "%5d %6s->%20s(%4d) %25s %20s $time $sequence\n",
$N,'NA',substr($ID,0,20),$Lch,$useremail,$userip;
print LOG1 "PDB file non-standard amino acid\n";
- for($i=1;$i<=$n;$i++)
+ for(my $i=1;$i<=$n;$i++)
{
print LOG1 "$LINELOG1{$i}";
}
@@ -1254,12 +1409,12 @@
endmodel:;
if($LchT2 < 10)
{
- open(LOG1,">$log1file");
+ open(LOG1,">$log1file") || die "Could not create or open $log1file for writing!\n";
print LOG1 "\n";
printf LOG1 "%5d %6s->%20s(%4d) %25s %20s $time $sequence\n",
$N,'NA',substr($ID,0,20),$Lch,$useremail,$userip;
print LOG1 "PDB file Lch= $LchT2 very short\n";
- for($i=1;$i<=$n;$i++)
+ for(my $i=1;$i<=$n;$i++)
{
print LOG1 "$LINELOG1{$i}";
}
@@ -1269,12 +1424,12 @@
}
if($LchT2 > 2000)
{
- open(LOG1,">$log1file");
+ open(LOG1,">$log1file") || die "Could not create or open $log1file for writing!\n";
print LOG1 "\n";
printf LOG1 "%5d %6s->%20s(%4d) %25s %20s $time $sequence\n",
$N,'NA',substr($ID,0,20),$Lch,$useremail,$userip;
print LOG1 "PDB file Lch= $LchT2 very long\n";
- for($i=1;$i<=$n;$i++)
+ for(my $i=1;$i<=$n;$i++)
{
print LOG1 "$LINELOG1{$i}";
}
@@ -1285,38 +1440,39 @@
}
}

- printerr:;
- if($errmessage ne '')
+ printerr:;
+ if ($errmessage ne '')
{
- print "$errmessage\n";
- exit(-1);
+ die("$errmessage\n");
+ } else {
+ die "An unspecified error occurred.\n";
}
}

if($restraint1)
{
- open(RE,">$datadir/restraint1.txt");
+ open(RE,">$datadir/restraint1.txt") || die "Could not create or open $datadir/restraint1.txt for writing!\n";
print RE "$r11";
close(RE);
`chmod g+rw $datadir/restraint1.txt`;
}
if($restraint2)
{
- open(RE,">$datadir/restraint2.txt");
+ open(RE,">$datadir/restraint2.txt") || die "Could not create or open $datadir/restraint2.txt for writing!\n";
print RE "$r22";
close(RE);
`chmod g+rw $datadir/restraint2.txt`;
}
if($restraint3)
{
- open(RE,">$datadir/restraint3.txt");
+ open(RE,">$datadir/restraint3.txt") || die "Could not create or open $datadir/restraint3.txt for writing!\n";
print RE "$r33";
close(RE);
`chmod g+rw $datadir/restraint3.txt`;
}
if($restraint4)
{
- open(RE,">$datadir/restraint4.txt");
+ open(RE,">$datadir/restraint4.txt") || die "Could not create or open $datadir/restraint4.txt for writing!\n";
print RE "$r44";
close(RE);
`chmod g+rw $datadir/restraint4.txt`;
@@ -1325,7 +1481,9 @@

######################## identity and homologous remove ###########################

-$sid_cut=$id_cut*100;
+my $sid_cut=$id_cut*100;
+my $R5;
+my $r51;
if($sid_cut||$homolog)
{
$errmessage='';
@@ -1344,12 +1502,12 @@
}
else
{
- open(LOG1,">$log1file");
+ open(LOG1,">$log1file") || die "Could not create or open $log1file for writing!\n";
print LOG1 "\n";
printf LOG1 "%5d %6s->%20s(%4d) %25s %20s $time $sequence\n",
$N,'NA',substr($ID,0,20),$Lch,$useremail,$userip;
print LOG1 "Wrong format for specifying sequence identity cutoff\n";
- for($i=1;$i<=$n;$i++)
+ for(my $i=1;$i<=$n;$i++)
{
print LOG1 "$LINELOG1{$i}";
}
@@ -1362,7 +1520,7 @@
if($homolog)
{
$R5 .="List of templates to be excluded & sequence identity cutoff(s):";
- open(IN,"$datadir/$homolog")||print "can not open $datadir/$homolog";
+ open(IN,"$datadir/$homolog") || die "Could not open $datadir/$homolog for reading!\n";
while($line1=)
{
$line1=~s/\%//mg;
@@ -1390,12 +1548,12 @@
{
$errmessage = "ERRORPlease specify PDB id, chain information & sequence identity cutoff (optional) in the format: PDBID:ChainID %Seq.Identity .If a cutoff is not specified, a default cutoff of 90% will be used to remove template proteins which are homologous to the specified template protein.Please read more about excluding templates during modeling here.\n";
goto printerr2;
- open(LOG1,">$log1file");
+ open(LOG1,">$log1file") || die "Could not create or open $log1file for writing!\n";
print LOG1 "\n";
printf LOG1 "%5d %6s->%20s(%4d) %25s %20s $time $sequence\n",
$N,'NA',substr($ID,0,20),$Lch,$useremail,$userip;
print LOG1 "Wrong format of PDBid:Chain\n";
- for($i=1;$i<=$n;$i++)
+ for(my $i=1;$i<=$n;$i++)
{
print LOG1 "$LINELOG1{$i}";
}
@@ -1414,22 +1572,22 @@
close IN;
}

-if($r51 ne "")
+if($r51 && $r51 ne "")
{
- print "exclude homologous templates...\n";
- open(RE,">$datadir/exclude.lst");
+ print "Excluding homologous templates ...\n";
+ open(RE,">$datadir/exclude.lst") || die "Could not create or open $datadir/exclude.lst for writing!\n";
print RE "$r51";
close(RE);
`chmod g+rw $datadir/exclude.lst`;
if(-s "$datadir/exclude.lst"){
foreach my $init(@TT){
$rst=`$pkgdir/bin/removehomologous.pl $pkgdir $libdir $datadir exclude.lst $init`;
- }
+ }
}
}

pos3_1:;
-printf "3.2 make restraints\n";
+printf "Step 3.2 of 7: Making restraints ...\n";
if(-s "$datadir/init.dat" && -s "$datadir/comb.dat" && -s "$datadir/par.dat" && -s "$datadir/combCA.dat"
&& -s "$datadir/comb8CA.dat" && -s "$datadir/distL.dat" && -s "$datadir/dist.dat")
{
@@ -1440,92 +1598,89 @@
if(!-s "$datadir/init.dat" || !-s "$datadir/comb.dat" || !-s "$datadir/par.dat" || !-s "$datadir/combCA.dat"
|| !-s "$datadir/comb8CA.dat" || !-s "$datadir/distL.dat" || !-s "$datadir/dist.dat")
{
- printf "missing some dat file after threading\n";
- goto ITpositionend;
+ die "Error: Some .dat files are missing. Perhaps threading failed.\n";
}

-
######################### end of generate restraint files #################################################
if((-s "$datadir/restraint1.txt")||(-s "$datadir/restraint2.txt") || (-s "$datadir/restraint3.txt")||(-s "$datadir/restraint4.txt"))
{
my $mod=`cat $pkgdir/I-TASSERmod/mMKRESmod`;
my $tag="mkres_$seqname"; # unique name
- my $jobname="$datadir/$tag";
+ my $jobfile="$datadir/$tag";
$mod=~s/\!TAG\!/$tag/mg;
$mod=~s/\!DATADIR\!/$datadir/mg;
$mod=~s/\!USER\!/$usrname/mg;
$mod=~s/\!LIB\!/$libdir/mg;
$mod=~s/\!PKGDIR\!/$pkgdir/mg;
- open(FH,">$jobname");
+ open(FH,">$jobfile") || die "Could not create or open $jobfile for writing!\n";
print FH "$mod\n";
close(FH);
- `chmod a+x $jobname`;
- `cp $jobname .`;
- print "restraint provided, run $jobname...\n";
- $rst=`$jobname`;
+ `chmod a+x $jobfile`;
+ `cp $jobfile .`;
+ print "restraint provided, run $jobfile...\n";
+ $rst=`$jobfile`;
print "$rst\n";
}

ITposition4:;
-printf "4.1 run simulation\n";
+printf "Step 4.1 of 7: Running simulations ...\n";

##check if all input files are correct
my $chkrst=`$pkgdir/I-TASSERmod/checkinput.pl $datadir`;
if($chkrst !~ /Congradulations/)
{
print "$chkrst\n";
-
- print "There are problems with your input files\n";
- print "Please check before simulation\n";
- exit;
+ die("There are problems with your input files. Please check before simulation.\n");
}

-%ncycle=(
+my %ncycle=(
'A'=>500,
'M'=>250,
);
-%switch=(
+my %switch=(
'A'=>1, #ab
'M'=>2, #rotation+translation
);

-@TTT=qw(
+my @TTT=qw(
A
M
);

-$init="init.dat";
-$comb="comb.dat";
-$dist="dist.dat";
-$combCA="combCA.dat";
-$distL="distL.dat";
-$par="par.dat";
-$comb8CA="comb8CA.dat";
-$exp="exp.dat";
-$pair1="pair1.dat";
-$pair3="pair3.dat";
-$svmseq="no";
-
-
-$nrun=1;
-$bindir="$pkgdir/I-TASSERmod";
-$commondir="$pkgdir/common";
+my $init="init.dat";
+my $comb="comb.dat";
+my $dist="dist.dat";
+my $combCA="combCA.dat";
+my $distL="distL.dat";
+my $par="par.dat";
+my $comb8CA="comb8CA.dat";
+my $exp="exp.dat";
+my $pair1="pair1.dat";
+my $pair3="pair3.dat";
+my $svmseq="no";
+
+my $nrun=1;
+my $bindir="$pkgdir/I-TASSERmod";
+my $commondir="$pkgdir/common";

-$mod=`cat $bindir/zysubmod`;
+my $mod=`cat $bindir/zysubmod`;
######## decide target type ###########
-open(init,"$datadir/init.dat");
-if(=~/\S+\s+(\S+)\s+(\S+)/)
+my $type;
+open(INIT,"$datadir/init.dat") || die "Could not open $datadir/init.dat for reading!\n";
+if(=~/\S+\s+(\S+)\s+(\S+)/)
{
$type=$1; #
- $n_good=$2; #number of good templates
+ #my $n_good=$2; #number of good templates
}
-close(init);
-open(rmsinp,"$datadir/rmsinp");
-=~/\d+\s+(\d+)/;
+close(INIT);
+open(RMSINP,"$datadir/rmsinp") || die "Could not open $datadir/rmsinp for reading!\n";
+=~/\d+\s+(\d+)/;
$Lch=$1;
-close(rmsinp);
+close(RMSINP);

######## decide number of runs ##################
+my %i1;
+my %i2;
$i1{"A"}=1;
$i1{"M"}=1;
if($type eq "easy")
@@ -1544,11 +1699,12 @@
$i2{"M"}=15;
}

-$totrunnum=0;
-$m_finish=0;
-foreach $T(@TTT)
+my $totrunnum=0;
+my $m_finish=0;
+my $out1;
+foreach my $T(@TTT)
{
- for($i=$i1{$T};$i<=$i2{$T};$i++)
+ for(my $i=$i1{$T};$i<=$i2{$T};$i++)
{
$totrunnum++;
if(-s "$datadir/out${i}$T")
@@ -1569,9 +1725,9 @@
}

$totrunnum=0;
-foreach $T(@TTT)
+foreach my $T(@TTT)
{
- for($i=$i1{$T};$i<=$i2{$T};$i++)
+ for(my $i=$i1{$T};$i<=$i2{$T};$i++)
{
if(-s "$datadir/out${i}$T")
{
@@ -1584,14 +1740,15 @@

$totrunnum++;
###
- $tag="$seqname\sim_$i$T";
+ my $tag="$seqname"."sim_$i$T";
$tagnames{$totrunnum}="$tag";
- $jobname="$datadir/$tag";
+ $jobfile="$datadir/$tag";
+ my $slurmfile="$jobfile.sbatch";
$errfile="$datadir/err_$tag";
$outfile="$datadir/out_$tag";
- $walltime="walltime=$hour:59:00";
+ $walltime="$hour:59:00";
###
- $mod1=$mod;
+ my $mod1=$mod;
$mod1=~s/\!ERRFILE\!/$errfile/mg;
$mod1=~s/\!OUTFILE\!/$outfile/mg;
$mod1=~s/\!WALLTIME\!/$walltime/mg;
@@ -1620,50 +1777,83 @@
$mod1=~s/\!SVMSEQ\!/$svmseq/mg;
$mod1=~s/\!USER\!/$usrname/mg;
$mod1=~s/\!BINDIR\!/$bindir/mg;
- open(job,">$jobname");
- print job "$mod1\n";
- close(job);
- `chmod a+x $jobname`;
+ open(JOB,">$jobfile") || die "Could not create or open $jobfile for writing!\n";
+ if ($runstyle eq "batch")
+ {
+ # Overwrite the working directory
+ $mod1 =~ s/work_dir="\/tmp.*$/work_dir="\$ENV{SCRATCH_DIR}";/mg;
+ open(SLURMFILE,">$slurmfile") || die "Could not create or open $slurmfile for writing!\n";
+ # Print some things to the job that have to be there
+ print SLURMFILE "#!/bin/bash\n";
+ print SLURMFILE "\n";
+ print SLURMFILE "#SBATCH -J $tag\n";
+ print SLURMFILE "#SBATCH -A $account\n";
+ print SLURMFILE "#SBATCH -t $walltime\n";
+ # In the original Perl, 1000 MB of memory are requested
+ print SLURMFILE "#SBATCH --mem-per-cpu=$sched_mem"."M\n";
+ print SLURMFILE "#SBATCH --workdir=$datadir\n";
+ print SLURMFILE "#SBATCH -o out_$tag\n";
+ print SLURMFILE "#SBATCH -e err_$tag\n";
+ print SLURMFILE "#SBATCH --mail-type=$when_to_notify\n";
+ print SLURMFILE "#SBATCH --mail-user=$notify_email\n";
+ print SLURMFILE "\n";
+ print SLURMFILE "echo \"Initial environment variables\"\n";
+ print SLURMFILE "echo \"-----------------------------\"\n";
+ print SLURMFILE "env | sort\n";
+ print SLURMFILE "/usr/bin/perl $jobfile\n";
+ print SLURMFILE "echo \"Final environment variables\"\n";
+ print SLURMFILE "echo \"---------------------------\"\n";
+ print SLURMFILE "env | sort\n";
+ close(SLURMFILE);
+ }
+ if ($DEBUG) {
+ $mod1 =~ s/work_dir=.*$/work_dir="$datadir\/tmp\/$tag";/mg;
+ }
+ print JOB "$mod1\n";
+ close(JOB);
}
}
printf "run $totrunnum $runstyle simulations\n";
-if($runstyle eq "parallel")
+if($runstyle eq "batch")
{
- for($i=2;$i<=$totrunnum;$i++)
+ for(my $i=1;$i<=$totrunnum;$i++)
{
- pos4_2:;
- $bsub=`qsub $datadir/$tagnames{$i}`;
- chomp($bsub);
- if(length $bsub ==0)
+ my $bsub=`sbatch $datadir/$tagnames{$i}.sbatch`;
+ if ($? != 0)
{
- sleep(20);
- goto pos4_2;
+ die "Job submission to Slurm failed!\n";
}
+ chomp($bsub);
+ print "$bsub\n";
+ sleep(20) if(length $bsub ==0);
}
- printf "run the first simulation job $tagnames{1}\n";
- `$datadir/$tagnames{1}`;
+ # Don't do this as it devours the working directory for
+ # the whole of I-TASSER!
+ #BPR printf "run the first simulation job $tagnames{1}\n";
+ #system("/usr/bin/perl $datadir/$tagnames{1}");
}
else
{
- for($i=1;$i<=$totrunnum;$i++)
+ system("chmod a+x $jobfile");
+ for(my $i=1;$i<=$totrunnum;$i++)
{
printf "run simulation job $i / $totrunnum\n";
- `$datadir/$tagnames{$i}`;
+ system("/usr/bin/perl $datadir/$tagnames{$i}");
}
}

-
-if($runstyle eq "serial")
+if($runstyle eq "interactive")
{
goto ITposition5;
}
-$niter=0;
+my $niter=0;
pos4_3:;
-printf "4.2 check finished simulations\n";
-$counter=`qstat |grep $usrname |grep $seqname\sim |wc -l`;
+printf "Step 4.2 of 7: Checking finished simulations ...\n";
+my $grepstring = $seqname."sim";
+my $counter=`squeue -o "%i %j %u" | grep $usrname | grep $grepstring | wc -l`;
sleep(1);
$counter=~s/\s//g;
-if(length $counter == 0) #qstat fail
+if(length $counter == 0) #squeue failed or returned no results
{
sleep(20);
goto pos4_3;
@@ -1677,21 +1867,21 @@
{
goto ITpositionend;
}
- goto pos4_3;
+ goto pos4_3;
}

-
-
ITposition5:;
-printf "5.1 do clustering\n";
+printf "Step 5.1 of 7: Performing clustering operations ...\n";
if(-s "$datadir/combo1.pdb" && -s "$datadir/closc1.pdb" && -s "$datadir/rst.dat" && -s "$datadir/str.txt" && -s "$datadir/cscore")
{
goto pos5_2;
}

-@tras=<$datadir/*tra*.bz2>;
-$n_tra=0;
-foreach $tra(@tras)
+my %traj;
+my @tras=<$datadir/*tra*.bz2>;
+my $n_tra=0;
+my $tra_name;
+foreach my $tra(@tras)
{
$tra=~/$datadir\/(\S+)/;
$tra_name=$1;
@@ -1707,13 +1897,13 @@
}
printf "No. of trajectory files: $n_tra\n";
goto ITpositionend if($n_tra <2); # without trajectories
-open(tra,">$tmpdir/tra.in.tmp");
-printf tra "$n_tra\n";
-for($k=1;$k<=$n_tra;$k++)
+open(TRA,">$tmpdir/tra.in.tmp") || die "Could not create or open $tmpdir/tra.in.tmp for writing!\n";
+printf TRA "$n_tra\n";
+for(my $k=1;$k<=$n_tra;$k++)
{
- printf tra "$traj{$k}\n";
+ printf TRA "$traj{$k}\n";
}
-close(tra);
+close(TRA);
`sort -d $tmpdir/tra.in.tmp > $datadir/tra.in`;

`cp $datadir/seq.dat .`;
@@ -1722,12 +1912,12 @@
`cp $pkgdir/I-TASSERmod/spicker45d ./spicker_$seqname`;
############# copy and unzip trajectories ######################

-open(tra,"tra.in");
-=~/(\d+)/;
+open(TRA,"tra.in") || die "Could not open tra.in for reading!\n";
+=~/(\d+)/;
$n_tra=$1;
-for($i=1;$i<=$n_tra;$i++)
+for(my $i=1;$i<=$n_tra;$i++)
{
- $line=;
+ $line=;
if($line=~/(\S+)/)
{
$traj{$i}=$1;
@@ -1742,9 +1932,10 @@
}
}
}
-close(tra);
-$n_tra_new=0;
-for($i=1;$i<=$n_tra;$i++)
+close(TRA);
+my %traj_new;
+my $n_tra_new=0;
+for(my $i=1;$i<=$n_tra;$i++)
{
if(-s "$traj{$i}")
{
@@ -1752,13 +1943,13 @@
$traj_new{$n_tra_new}=$traj{$i};
}
}
-open(train,">tra.in");
-print train "$n_tra_new 1 1\n";
-for($i=1;$i<=$n_tra_new;$i++)
+open(TRAIN,">tra.in") || die "Could not create or open tra.in for writing!\n";
+print TRAIN "$n_tra_new 1 1\n";
+for(my $i=1;$i<=$n_tra_new;$i++)
{
- print train "$traj_new{$i}\n";
+ print TRAIN "$traj_new{$i}\n";
}
-close(train);
+close(TRAIN);

############ run spicker #######################################
@@ -1771,25 +1962,26 @@
###############cscore###################################
$init="$datadir/init.dat";
$rst="$datadir/rst.dat";
-$tmp=`$pkgdir/I-TASSERmod/get_cscore.pl $init $rst`;
-open(cscore,">$datadir/cscore");
-print cscore "$tmp\n";
-close(cscore);
+my $tmp=`$pkgdir/I-TASSERmod/get_cscore.pl $init $rst`;
+open(CSCORE,">$datadir/cscore") || die "Could not create or open $datadir/cscore for writing!\n";
+print CSCORE "$tmp\n";
+close(CSCORE);
pos5_2:;
-printf "5.2 build full-atomic model\n";
+printf "Step 5.2 of 7: Building full-atomic model ...\n";
+my $nc;
####### nc5 ###############
-open(rst,"$datadir/rst.dat");
-while($line=)
+open(RST,"$datadir/rst.dat") || die "Could not open $datadir/rst.dat for reading!\n";
+while($line=)
{
if($line=~/Number of clusters:\s*(\d+)/)
{
$nc=$1;
}
}
-close(rst);
+close(RST);
$nc5=$nc if($nc5>$nc);
-$fullmodel=1;
-for($i=1;$i<=$nc;$i++)
+my $fullmodel=1;
+for(my $i=1;$i<=$nc;$i++)
{
if(-s "$datadir/closc$i\.pdb" && -s "$datadir/combo$i\.pdb")
{
@@ -1803,16 +1995,28 @@
{
goto ITpositionend;
}
-$refdir="$pkgdir/abs/mybin";
-for($i=1;$i<=$nc5;$i++)
+my $refdir="$pkgdir/abs/mybin";
+my $EM_command = "";
+my $mv_command = "";
+for(my $i=1;$i<=$nc5;$i++)
{
if(-s "$datadir/closc$i\.pdb" && -s "$datadir/combo$i\.pdb")
{
if(!-s "$datadir/emmcclosc$i\.pdb" || !-s "$datadir/model$i\.pdb")
{
- #print "$pkgdir/I-TASSERmod/EMrefinement.pl $datadir $refdir closc$i\.pdb combo$i\.pdb $seqname $usrname\n";
- `$pkgdir/I-TASSERmod/EMrefinement.pl $datadir $refdir closc$i\.pdb combo$i\.pdb $seqname $usrname`;
- `mv $datadir/hemmcclosc$i\.pdb $datadir/model$i\.pdb`;
+ $EM_command = "$pkgdir/I-TASSERmod/EMrefinement.pl";
+ $EM_command .= " $datadir";
+ $EM_command .= " $refdir";
+ $EM_command .= " closc$i\.pdb combo$i\.pdb";
+ $EM_command .= " $seqname";
+ $EM_command .= " $usrname";
+ print "$EM_command\n";
+ system("$EM_command") == 0 || die "Error: EMrefinement.pl failed!\n";
+
+ $mv_command = "mv";
+ $mv_command .= " $datadir/hemmcclosc$i\.pdb";
+ $mv_command .= " $datadir/model$i\.pdb";
+ system("$mv_command") == 0 || die "Error: Could not rename file $datadir/hemmcclosc$i\.pdb to $datadir/model$i\.pdb!\n";
`rm -f $datadir/emmc*`;
`rm -f $datadir/mc*`;
`rm -f $datadir/node_*`;
@@ -1820,10 +2024,8 @@
}
}

-
-printf "6 Estimate local accuracy of models and B-factor\n";
-`$pkgdir/bin/local/get_rsq_bfp.pl $pkgdir $libdir $datadir`;
-
+print "Step 6 of 7: Estimating local accuracy of models and B-factors ...\n";
+system("$pkgdir/bin/local/get_rsq_bfp.pl $pkgdir $libdir $datadir") == 0 || die "Error: get_rsq_bfp.pl failed!\n";

if($keeptraj ne "true")
{
@@ -1847,31 +2049,71 @@
}
if($function ne "")
{
- print "7 run COACH to predict function: $function...\n";
- print "$pkgdir/I-TASSERmod/runCOACH.pl -pkgdir $pkgdir -libdir $libdir -runstyle $runstyle -protname $seqname -model model1.pdb -datadir $datadir -homoflag $run -idcut $id_cut -LBS $lbs -EC $ecn -GO $got\n";
- `$pkgdir/I-TASSERmod/runCOACH.pl -pkgdir $pkgdir -libdir $libdir -runstyle $runstyle -protname $seqname -model model1.pdb -datadir $datadir -homoflag $run -idcut $id_cut -LBS $lbs -EC $ecn -GO $got >$datadir/out_COACH_model1`;
+ print "Step 7 of 7: Running COACH to predict function: $function ...\n";
+ my $coach_command = "$pkgdir/I-TASSERmod/runCOACH.pl";
+ $coach_command .= " -pkgdir $pkgdir";
+ $coach_command .= " -libdir $libdir";
+ $coach_command .= " -runstyle $runstyle";
+ $coach_command .= " -protname $seqname";
+ $coach_command .= " -model model1.pdb";
+ $coach_command .= " -datadir $datadir";
+ $coach_command .= " -homoflag $run";
+ $coach_command .= " -idcut $id_cut";
+ $coach_command .= " -LBS $lbs";
+ $coach_command .= " -EC $ecn";
+ $coach_command .= " -GO $got";
+ $coach_command .= " > $datadir/out_COACH_model1";
+ print "$coach_command\n";
+ system("$coach_command") == 0 || die "Error: COACH failed!\n";

`rm -f $datadir/model1/tmsite/tms_*`;
`rm -f $datadir/model1/cofactor/BSITE_model1/lig_*`;
}

ITpositionend:;
-`rm -fr $tmpdir`;
+#BPR `rm -fr $tmpdir`;
+print "I-TASSER finished.\n";
exit();

sub print_help
{
printf
"I-TASSER USAGE:
-=====================
-Mandatory arguments:
-=====================
+=================================
+Mandatory arguments in all cases:
+=================================
./runI-TASSER.pl -pkgdir package_dir -libdir lib_dir -java_home JAVA_HOME_path -seqname sequence_name -datadir data_dir
-==================
+=======================================================
+Mandatory arguments when using a batch queueing system:
+=======================================================
+ -schedmem amount of memory (MB) to request for each job
+ (note: more than 2.5 GB are required due to Java VMs)
+ May alternatively be set using the ITASSER_SCHEDMEM
+ environment variable.
+ -class class in which to schedule jobs
+ May alternatively be set using the ITASSER_SCHEDCLASS
+ environment variable.
+ -group group in which to schedule jobs
+ May alternatively be set using the ITASSER_SCHEDGROUP
+ environment variable.
+ -account account to which to charge jobs
+ May alternatively be set using the ITASSER_SCHEDACCT
+ environment variable.
+ -when-to-notify string specifying when to notify about job changes
+ (note: varies depending on the scheduler)
+ May alternatively be set using the ITASSER_NOTIFYWHEN
+ environment variable.
+ -email email address to which to send notifications
+ May alternatively be set using the ITASSER_NOTIFYEMAIL
+ environment variable.
+===================
Optional arguments:
-==================
- -runstyle default value is \"serial\" which means running I-TASSER simulation sequentially,
- \"parallel\" means running I-TASSER simulations in parallel, distributed on cluster nodes
+===================
+ -blastdir location of BLAST binaries. Defaults to $pkgdir/blast/bin.
+ -tmpdir location of temporary directory. Defaults to /tmp/\$usrname/IT\$seqname, but may be overridden and should be
+ in certain cases. Will change if \$SCRATCH_DIR is set in environment.
+ -runstyle default value is \"interactive\" which means running I-TASSER simulations in serial on the command line,
+ \"batch\" means running simulation jobs in the cluster through a batch queue system
-homoflag [real, benchmark],\"real\" will use all templates, \"benchmark\" will exclude homologous templates
-idcut sequence identity cutoff for \"benchmark\" runs, default value is 0.3, range is in [0,1]
-ntemp number of top templates output for each threading program, default is 20, range is in [1,50]
@@ -1936,4 +2178,3 @@
}
return ($dir, $fname);
}
-
diff -ur /home/brob695/I-TASSER4.0.orig/I-TASSERmod/runLOMETS.pl ./I-TASSERmod/runLOMETS.pl
--- /home/brob695/I-TASSER4.0.orig/I-TASSERmod/runLOMETS.pl 2014-07-28 09:49:01.000000000 +1200
+++ ./I-TASSERmod/runLOMETS.pl 2014-08-14 12:30:47.369158538 +1200
@@ -3,6 +3,7 @@
use Getopt::Long;
use File::Path;
use File::Copy;
+use Cwd;

# add 3 new mod files in LOMETS version 2
#########################################################################check OS
@@ -30,7 +31,7 @@
printf "-idcut sequence identity cutoff when using benchmark, default value is 0.30, range is (0.00,1.00)\n";
printf "-ntemp number of top templetes selected by each threading program, default value is 20, range is [1,50]\n";
printf "-ntop number of top threading alignments by each threading program, default value is 10, range is [1,20]\n";
- printf "-runstyle [parallel or serial] run with command line directly (serial, default) or submit job with qsub (parallel)\n";
+ printf "-runstyle [parallel or serial] run with command line directly (serial, default) or submit job with sbatch (parallel)\n";
exit();
}

@@ -42,10 +43,10 @@
}
else
{
- if ( !( `which qstat` ) || !( `which qsub` ))
+ if ( system("which squeue") != 0 || system("which sbatch") != 0)
{
- print "\nYou can not set \"-runstyle =parallel\" because your system dose not support the commands: qsub and qstat.\n";
- print "To support qsub and qstat, you have to install the job management software PBS server.\n\n";
+ print "\nYou can not set \"-runstyle =parallel\" because your system dose not support the commands: squeue and sbatch.\n";
+ print "To support squeue and sbatch, you have to install the Slurm job scheduler.\n\n";
exit;
}
}
@@ -196,8 +197,8 @@

$time=`date`;
printf "starting time: $time";
-$pwd=`pwd`;
-printf "pwd: $pwd";
+$pwd=&getcwd();
+print "Working directory: $pwd\n";
$unicode="LM$seqname";#useful when making tmp dir and setting unique jobname
$tmpdir="/$tmptop/$usrname/$unicode";
if(-d "$tmpdir")
@@ -416,7 +417,7 @@
printf "start $runstyle threading $T\n";
if($runstyle eq "parallel")
{
- my $running=`qstat -f`;
+ my $running=`squeue --format="%24j"`;
if($running =~ /$tag/)
{
print "$tag is running, skip\n";
@@ -424,8 +425,9 @@
}
$errfile ="$datadir/err_$tag";
$outfile ="$datadir/out_$tag";
- $walltime ="walltime=72:00:00,mem=1000mb";
- $bsub =`qsub -e $errfile -o $outfile -l $walltime -N $tag $jobname`;
+ $walltime = "72:00:00"
+ $mem = "1000MB"
+ $bsub =`sbatch -e $errfile -o $outfile --time=$walltime --mem=$mem -J $tag $jobname`;
}
else
{
@@ -451,9 +453,9 @@
last if($totinit>=@TT);

my $flag=0;
- if($runstyle ne "serial") #job running with qsub
+ if($runstyle ne "serial") #job running with the queueing system
{
- my $running=`qstat -f`;
+ my $running=`squeue --format="%24j"`;
foreach $T(@TT)
{
my $tag="$T\_$seqname"; # unique name
diff -ur /home/brob695/I-TASSER4.0.orig/I-TASSERmod/wdPPASmod ./I-TASSERmod/wdPPASmod
--- /home/brob695/I-TASSER4.0.orig/I-TASSERmod/wdPPASmod 2014-07-28 09:49:02.000000000 +1200
+++ ./I-TASSERmod/wdPPASmod 2014-08-14 17:21:21.473558229 +1200
@@ -156,7 +156,7 @@
pos6:;
}
$Lch=length $sequence;
-open(seq,">protein.seq");
+open(seq,">protein.seq") || die "Could not create or open protein.seq for writing!\n";
printf seq ">protein\n";
for($i=1;$i<=$Lch;$i++){
$a=substr($sequence,$i-1,1);
@@ -180,13 +180,13 @@
else
{
printf "running Psi-blast .....\n";
-`$blastdir/blastpgp -b 1000 -j 3 -h 0.001 -d $db -i protein.seq -C psitmp.chk > blast.out`;
+system("$blastdir/blastpgp -b 1000 -j 3 -h 0.001 -d $db -i protein.seq -C psitmp.chk > blast.out") == 0 || die "An error occurred while running blastpgp. Stop.\n";
}
#`$blastdir/blastpgp -b 1000 -j 3 -h 0.001 -d $db -i protein.seq -C psitmp.chk > blast.out`;

########### extract 'pre.prf' ###################
#### record multiple sequence alignment $am{i_seq,i_pos} -------->
-open(blast,"blast.out");
+open(blast,"blast.out") || die "Could not open blast.out for reading!\n";
while($line=){
if($line=~/Results from round\s+(\d+)/){
$ROUND=$1;
@@ -280,7 +280,7 @@
}
#^^^^^^^^^ Henikoff frequence finished ^^^^^^^^^^^^^

-open(freq,">protein.prf");
+open(freq,">protein.prf") || die "Could not create or open protein.prf for writing!\n";
printf freq "$Lch\n";
for($i=1;$i<=$Lch;$i++){
printf freq "%3d $seqQ{$i} %3d",$i,$i;
@@ -307,7 +307,7 @@
`cp $pkgdir/bin/v4.pl .`;
`perl v4.pl $pkgdir $libdir`;

-open(in,">in.dd");
+open(in,">in.dd") || die "Could not create or open in.dd for writing!\n";
printf in "seq.dat\n";
printf in "protein.seq\n";
printf in "protein.mtx\n";
@@ -322,17 +322,18 @@
close(in);

`cp $data_dir/seq.dat .`;
-`cp $pkgdir/bin/zalign/fGGGd.jar .`;
+#`cp $pkgdir/bin/zalign/fGGGd.jar .`;

if(!-e "$java_exe/bin/java"){
print "JAVA_HOME setting is not correct\n";
exit(1);
}else{
- `$java_exe/bin/java -Xms2512m -Xmx2512m -jar fGGGd.jar > rst.dat`;
+ my $java_cmd = "$java_exe/bin/java -Xms2512m -Xmx2512m -jar $pkgdir/bin/zalign/fGGGd.jar > rst.dat";
+ system("$java_cmd") == 0 || die "An error occurred while running fGGGd.jar. Stop.\n";
}

################ calculate Z-score ######################
-open(out,"rst.dat");
+open(out,"rst.dat") || die "Could not open rst.dat for reading!\n";
$i=0;
while($line=){
if($line=~/(\d+)\s+(\S+)\s+(\S+)/){
@@ -356,17 +357,17 @@
###########################################################
##### create template file 'init.dat' #####################
###########################################################
-open(init,">init.dat");
+open(init,">init.dat") || die "Could not create or open init.dat for writing!\n";

$i_t=0;

-open(TOPA,">NNNd_top_alignments.txt")||die "can not open NNNd_top_alignments.txt for writing";
+open(TOPA,">NNNd_top_alignments.txt") || die "Could not create or open NNNd_top_alignments.txt for writing!\n";
for($i=1;$i<=$N_hit;$i++){
$template_name=$zscore_keys[$i-1];
$template_name=~s/\./\\\./mg; #useful for match
$zscore_value=$zscore{$zscore_keys[$i-1]};
######## read the alignment -------->
- open(align,"align.dat");
+ open(align,"align.dat") || die "Could not open align.dat for reading!\n";
while($line=){
if($line=~/structureX:$template_name\s*\:/){
$sequenceT=""; #template sequence
@@ -422,7 +423,7 @@
}
$i_t++;

- open(temppdb,"temp.pdb");
+ open(temppdb,"temp.pdb") || die "Could not open temp.pdb for reading!\n";
$n=0;
while($line=){
$ATOM=substr($line,0,4);
@@ -483,7 +484,7 @@

close TOPA; ### CLOSE recording top alignments file

-open(init1,">init1.dat");
+open(init1,">init1.dat") || die "Could not create or open init1.dat for writing!\n";
printf init1 "%5d %5d (N_temp, Lch)\n",$i_t,$Lch;
close(init1);
`cat init.dat >> init1.dat`;
@@ -497,7 +498,7 @@
`sync`;
`sync`;
sleep(1);
-`rm -fr $work_dir`;
+#SLURM `rm -fr $work_dir`;

exit();

diff -ur /home/brob695/I-TASSER4.0.orig/I-TASSERmod/wMUSTERmod ./I-TASSERmod/wMUSTERmod
--- /home/brob695/I-TASSER4.0.orig/I-TASSERmod/wMUSTERmod 2014-07-28 09:49:02.000000000 +1200
+++ ./I-TASSERmod/wMUSTERmod 2014-08-14 17:21:32.389935766 +1200
@@ -241,7 +241,7 @@
else
{
printf "running Psi-blast .....\n";
-`$blastdir/blastpgp -b 1000 -j 3 -h 0.001 -d $db -i protein.seq -C psitmp.chk > blast.out`;
+system("$blastdir/blastpgp -b 1000 -j 3 -h 0.001 -d $db -i protein.seq -C psitmp.chk > blast.out") == 0 || die "An error occurred while running blastpgp. Stop.\n";
}
#`$blastdir/blastpgp -b 1000 -j 3 -h 0.001 -d $db -i protein.seq -C psitmp.chk -Q $pdb.pssm > blast.out`;

@@ -369,7 +369,7 @@

########### run psi-blast #######################
printf "running Psi-blast .....\n";
-`$blastdir/blastpgp -b 1000 -j 3 -h 1.0 -d $db -i protein.seq -C psitmp2.chk > blast2.out`;
+system("$blastdir/blastpgp -b 1000 -j 3 -h 1.0 -d $db -i protein.seq -C psitmp2.chk > blast2.out") == 0 || die "An error occurred while running blastpgp. Stop.\n";

###############distant profile ##############
undef %am,%nA,%w,%ev_dim;
@@ -528,13 +528,12 @@
printf in "$libdir/stride/\n";
close(in);

-`cp $pkgdir/bin/zalign/fQQQ.jar .`;
-
if(!-e "$java_exe/bin/java"){
print "JAVA_HOME setting is not correct\n";
exit(1);
}else{
- `$java_exe/bin/java -Xms2512m -Xmx2512m -jar fQQQ.jar > rst.dat`;
+ my $java_cmd = "$java_exe/bin/java -Xms2512m -Xmx2512m -jar $pkgdir/bin/zalign/fQQQ.jar > rst.dat";
+ system("$java_cmd") == 0 || die "An error occurred while running fQQQ.jar. Stop.\n";
}

################ calculate Z-score ######################
@@ -777,7 +776,7 @@
`sync`;
`sync`;
sleep(1);
-`rm -fr $work_dir`;
+#SLURM `rm -fr $work_dir`;
exit();

diff -ur /home/brob695/I-TASSER4.0.orig/I-TASSERmod/wPPASmod ./I-TASSERmod/wPPASmod
--- /home/brob695/I-TASSER4.0.orig/I-TASSERmod/wPPASmod 2014-07-28 09:49:02.000000000 +1200
+++ ./I-TASSERmod/wPPASmod 2014-08-14 17:21:40.203768682 +1200
@@ -1,5 +1,6 @@
#!/usr/bin/perl
use Math::Trig;
+use File::Copy;

########### setup the environment and Working DIRectory ###
$ENV{'PATH'}="/usr/local/bin:/bin:/usr/bin:/usr/X11R6/bin:/usr/pgi/linux86/bin";
@@ -138,8 +139,7 @@
`mkdir -p $work_dir`;
chdir "$work_dir";
`rm -f $work_dir/*`;
-`cp $pkgdir/bin/align ./align`;
-`cp $pkgdir/bin/zalign/zal2 ./zalign`;
+copy("$zalignbin/zal2", "./zalign") || die "Could not copy $zalignbin/zal2 to ./zalign!\n";

################ make fasta sequence file #################
@seqtxts=`cat $data_dir/seq.txt`;
@@ -152,7 +152,7 @@
pos6:;
}
$Lch=length $sequence;
-open(seq,">protein.seq");
+open(seq,">protein.seq") || die "Could not create or open protein.seq for writing!\n";
printf seq ">protein\n";
for($i=1;$i<=$Lch;$i++){
$a=substr($sequence,$i-1,1);
@@ -176,13 +176,13 @@
else
{
printf "running Psi-blast .....\n";
- `$blastdir/blastpgp -b 1000 -j 3 -h 0.001 -d $db -i protein.seq -C psitmp.chk > blast.out`;
+ system("$blastdir/blastpgp -b 1000 -j 3 -h 0.001 -d $db -i protein.seq -C psitmp.chk > blast.out") == 0 || die "An error occurred while running blastpgp. Stop.\n";
}
# `$blastdir/blastpgp -b 1000 -j 3 -h 0.001 -d $db -i protein.seq -C psitmp.chk > blast.out`;

########### extract 'pre.prf' ###################
#### record multiple sequence alignment $am{i_seq,i_pos} -------->
-open(blast,"blast.out");
+open(blast,"blast.out") || die "Could not open blast.out for reading!\n";
while($line=){
if($line=~/Results from round\s+(\d+)/){
$ROUND=$1;
@@ -276,7 +276,7 @@
}
#^^^^^^^^^ Henikoff frequence finished ^^^^^^^^^^^^^

-open(freq,">protein.prf");
+open(freq,">protein.prf") || die "Could not create or open protein.prf for writing!\n";
printf freq "$Lch\n";
for($i=1;$i<=$Lch;$i++){
printf freq "%3d $seqQ{$i} %3d",$i,$i;
@@ -298,11 +298,11 @@
`mv psitmp.mtx protein.mtx`;

########### run zalign #############
-`cp protein.seq seq.txt`;
-`cp $pkgdir/bin/v4.pl .`;
-`perl v4.pl $pkgdir $libdir`;
+copy("protein.seq", "seq.txt") || die "Could not copy protein.seq to seq.txt!\n";
+copy("$pkgdir/bin/v4.pl", "./v4.pl") || die "Could not copy v4.pl from $pkgdir/bin!\n";
+system("/usr/bin/perl v4.pl $pkgdir $libdir") == 0 || die "Execution of v4.pl failed!\n";

-open(in,">in.dd");
+open(in,">in.dd") || die "Could not create or open in.dd for writing!\n";
printf in "seq.dat\n";
printf in "protein.seq\n";
printf in "protein.prf\n";
@@ -312,19 +312,19 @@
printf in "profile.txt\n";
printf in "$libdir/dotProfiles\n";
close(in);
-`cp $data_dir/seq.dat .`;
+copy("$data_dir/seq.dat", "./seq.dat") || die "Could not copy seq.dat from $data_dir!\n";

-`cp $pkgdir/bin/zalign/fNNNd.jar .`;

if(!-e "$java_exe/bin/java"){
print "JAVA_HOME setting is not correct\n";
exit(1);
}else{
- `$java_exe/bin/java -Xms2512m -Xmx2512m -jar fNNNd.jar > rst.dat`;
+ my $java_cmd = "$java_exe/bin/java -Xms2512m -Xmx2512m -jar $zalignbin/fNNNd.jar > rst.dat";
+ system("$java_cmd") == 0 || die "An error occurred while running fNNNd.jar. Stop.\n";
}

################ calculate Z-score ######################
-open(out,"rst.dat");
+open(out,"rst.dat") || die "Could not open rst.dat for reading!\n";
$i=0;
while($line=){
if($line=~/(\d+)\s+(\S+)\s+(\S+)/){
@@ -348,15 +348,15 @@
###########################################################
##### create template file 'init.dat' #####################
###########################################################
-open(init,">init.dat");
+open(init,">init.dat") || die "Could not create or open init.dat for writing!\n";
$i_t=0;
-open(TOPA,">NNNd_top_alignments.txt")||die "can not open NNNd_top_alignments.txt for writing";
+open(TOPA,">NNNd_top_alignments.txt") || die "Could not create or open NNNd_top_alignments.txt for writing!\n";
for($i=1;$i<=$N_hit;$i++){
$template_name=$zscore_keys[$i-1];
$template_name=~s/\./\\\./mg; #useful for match
$zscore_value=$zscore{$zscore_keys[$i-1]};
######## read the alignment -------->
- open(align,"align.dat");
+ open(align,"align.dat") || die "Could not open align.dat for reading!\n";
while($line=){
if($line=~/structureX:$template_name\s*\:/){
$sequenceT=""; #template sequence
@@ -401,7 +401,7 @@
`cp $libdir/PDB/$template_name\.pdb ./temp.pdb`;
$idcut0=!ID_CUT!;
if($idcut0<0.999){
- $align_rst=`./align protein.seq temp.pdb 2`;
+ $align_rst=`$pkgdir/bin/align protein.seq temp.pdb 2`;
if($align_rst=~/Identical length\:\s+(\d+)/){
$id=$1/$Lch;
goto pos2 if($id>=$idcut0);
@@ -412,7 +412,7 @@
}
$i_t++;

- open(temppdb,"temp.pdb");
+ open(temppdb,"temp.pdb") || die "Could not open temp.pdb for reading!\n";
$n=0;
while($line=){
$ATOM=substr($line,0,4);
@@ -471,7 +471,7 @@
$time=`date`;
close(init);
close TOPA; ### CLOSE recording top alignments file
-open(init1,">init1.dat");
+open(init1,">init1.dat") || die "Could not create or open init1.dat for writing!\n";
printf init1 "%5d %5d (N_temp, Lch)\n",$i_t,$Lch;
close(init1);
`cat init.dat >> init1.dat`;
@@ -483,6 +483,6 @@
`sync`;
`sync`;
sleep(1);
-`rm -fr $work_dir`;
+#SLURM `rm -fr $work_dir`;
exit();

diff -ur /home/brob695/I-TASSER4.0.orig/I-TASSERmod/zysubmod ./I-TASSERmod/zysubmod
--- /home/brob695/I-TASSER4.0.orig/I-TASSERmod/zysubmod 2014-07-28 09:49:03.000000000 +1200
+++ ./I-TASSERmod/zysubmod 2014-08-21 12:38:43.639007000 +1200
@@ -1,7 +1,7 @@
#!/usr/bin/perl
-#PBS -e !ERRFILE!
-#PBS -o !OUTFILE!
-#PBS -l !WALLTIME!
+#SBATCH -e !ERRFILE!
+#SBATCH -o !OUTFILE!
+#SBATCH --time=!WALLTIME!

use Math::Trig;

@@ -143,6 +143,6 @@
`sync`;
`sync`;
sleep(1);
-`rm -fr $work_dir`;
+#SLURM `rm -fr $work_dir`;

exit();
diff -ur /home/brob695/I-TASSER4.0.orig/PSSpred/mPSSpred.pl ./PSSpred/mPSSpred.pl
--- /home/brob695/I-TASSER4.0.orig/PSSpred/mPSSpred.pl 2014-07-28 09:48:23.000000000 +1200
+++ ./PSSpred/mPSSpred.pl 2014-08-14 11:31:55.389310780 +1200
@@ -1,5 +1,8 @@
#!/usr/bin/perl
use Math::Trig;
+use Cwd;
+
+my $cwd = &getcwd;

################################################################
#
@@ -238,7 +241,7 @@
close(fasta);

########### run psi-blast #######################
-print `pwd`;
+print "$cwd\n";

if(!-s "pssm.txt" || !-s "blast.out" || !-s "psitmp.chk" || !-s "mtx")
{
@@ -571,7 +574,7 @@
#####
$nprog=7;
for($i=1;$i<=$nprog;$i++){
- printf "Running PSSpred\n";
+ printf "Running PSSpred in directory $cwd\n";
system("$PSSpreddir/PSSpred$i $PSSpreddir/wgt$i output$i.ss");
}

Only in .: submitI-TASSER4.0