Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
What's new
10
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in / Register
Toggle navigation
Open sidebar
HPC - JGU - Life Sciences
seq-analysis
Commits
64b3185a
Commit
64b3185a
authored
Aug 28, 2019
by
Christian Meesters
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
so far so ungood
parent
0ad2386c
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
58 additions
and
44 deletions
+58
-44
blast/parallel_BLAST/LA_Wrapper
blast/parallel_BLAST/LA_Wrapper
+58
-44
No files found.
blast/parallel_BLAST/LA_Wrapper
View file @
64b3185a
...
@@ -43,7 +43,13 @@ module purge
...
@@ -43,7 +43,13 @@ module purge
# load the most current version of GNU parallel
# load the most current version of GNU parallel
module load tools/parallel
module load tools/parallel
module load lang/Python/3.6.4-foss-2018a
#module load lang/Python/3.6.4-foss-2018a
module load lang/Python/3.7.4-GCCcore-8.3.0
#TODO: find a solution for the bug in BLAST+ AND to select the version by hand
module load bio/BLAST+/2.9.0-gompi-2019a
#module load bio/BLAST+/2.7.1-foss-2018a
### setup variable for THIS script; giving absolute path if necessary
### setup variable for THIS script; giving absolute path if necessary
SCRIPT
=
"
$0
"
SCRIPT
=
"
$0
"
...
@@ -433,6 +439,16 @@ done
...
@@ -433,6 +439,16 @@ done
FASTA
=
$_arg_fasta
FASTA
=
$_arg_fasta
DATABASE
=
$_arg_database
DATABASE
=
$_arg_database
### checking db for integrity
if
[
-e
${
DATABASE
}*
.fa
]
;
then
DBSUFFIX
=
".fa"
elif
[
-e
${
DATABASE
}*
.fasta
]
;
then
DBSUFFIX
=
".fasta"
else
error
"no file '.fa' or '.fasta' found in
${
DATABASE
}
- unable to proceed reliably"
exit
1
fi
### check if query & database exist
### check if query & database exist
if
[[
$_arg_test
==
"off"
]]
&&
[
!
-e
"
$FASTA
"
]
;
then
if
[[
$_arg_test
==
"off"
]]
&&
[
!
-e
"
$FASTA
"
]
;
then
error
"FASTA input was: '
$FASTA
' - no such file!"
error
"FASTA input was: '
$FASTA
' - no such file!"
...
@@ -545,7 +561,7 @@ if [ -z "$SLURM_JOB_ID" ] && [[ $_arg_test == "off" ]]; then
...
@@ -545,7 +561,7 @@ if [ -z "$SLURM_JOB_ID" ] && [[ $_arg_test == "off" ]]; then
echo
"removing directory
$JOBTAG
"
echo
"removing directory
$JOBTAG
"
rm
-r
$JOBTAG
rm
-r
$JOBTAG
else
else
echo
"So you want to continue regardless
?
(
e.g. scratch files already existing)
([y]/n)"
echo
"So
,
you want to continue regardless (
using the existing scratch files)?
([y]/n)"
echo
-n
'>'
echo
-n
'>'
read
ENTER
read
ENTER
if
[[
${
ENTER
,,
}
=
'n'
||
${
ENTER
,,
}
==
'no'
]]
;
then
if
[[
${
ENTER
,,
}
=
'n'
||
${
ENTER
,,
}
==
'no'
]]
;
then
...
@@ -572,11 +588,13 @@ FASTA="$FASTAPATH/$FASTAID"
...
@@ -572,11 +588,13 @@ FASTA="$FASTAPATH/$FASTAID"
### setup blast and splitup executable; check if exist
### setup blast and splitup executable; check if exist
allowed_executables
=
"blastx blastp blastn"
allowed_executables
=
"blastx blastp blastn"
if
[[
!
$allowed_executables
=
~
(
^|[[:space:]]
)
"
$_arg_executable
"
(
$|
[[
:space:]]
)
]]
;
then
if
[[
!
$allowed_executables
=
~
(
^|
[[
:space:]]
)
"
$_arg_executable
"
(
$|
)
]]
;
then
#
BLASTEXE=$(which $_arg_executable)
BLASTEXE
=
$(
which
$_arg_executable
)
#
else
else
error
"
$_arg_executable
ought to be one of [
$allowed_executables
]"
error
"
$_arg_executable
ought to be one of [
$allowed_executables
]"
exit
1
fi
fi
export
_arg_executable
### which is the reference directory size?
### which is the reference directory size?
_arg_ramdisk
=
$(
du
-shL
--block-size
=
1M
"
$_arg_database
"
|
cut
-f1
)
M
_arg_ramdisk
=
$(
du
-shL
--block-size
=
1M
"
$_arg_database
"
|
cut
-f1
)
M
...
@@ -665,40 +683,30 @@ RAMDISK=$JOBDIR/ramdisk
...
@@ -665,40 +683,30 @@ RAMDISK=$JOBDIR/ramdisk
HOSTLIST
=
$(
scontrol show
hostname
$SLURM_JOB_NODELIST
|
paste
-d
,
-s
|
tr
','
,
' '
)
HOSTLIST
=
$(
scontrol show
hostname
$SLURM_JOB_NODELIST
|
paste
-d
,
-s
|
tr
','
,
' '
)
QUEUE
=
''
QUEUE
=
''
#myhost=$(hostname -f)
stagefile
=
/localscratch/
$SLURM_JOB_ID
/dummy_stagein.sh
rstagefile
=
/localscratch/
$SLURM_JOB_ID
/stagein.sh
source
"
${
SCRIPT_PATH
}
"
/stage_in.sh
stage_in_writer
chmod
+x
$stagefile
# distribute the stagewriter
sbcast
$stagefile
$rstagefile
rm
$stagefile
stagefile
=
$rstagefile
# we would not need this loop with regard to slurm, but as we have
# asynchronous tasks already, we keep track with the queue
for
HOST
in
$HOSTLIST
;
do
for
HOST
in
$HOSTLIST
;
do
if
[
-L
${
DATABASEPATH
}
]
;
then
srun
-w
$HOST
-N1
-n1
-c1
--mem-per-cpu
=
5000M
$stagefile
&
warning
"If the reference directory is a link, fast stage-in is not possible."
queue
$!
for
fname
in
${
DATABASEPATH
}
/
*
;
do
eval
"ssh
$HOST
cp -L
$fname
${
RAMDISK
}
/
$(
basename
$fname
)
"
&
PID
=
$!
queue
$PID
done
else
for
fname
in
${
DATABASEPATH
}
/
*
;
do
if
[
-L
"
$fname
"
]
;
then
eval
"ssh
$HOST
cp -L
$fname
${
RAMDISK
}
/
$(
basename
$fname
)
"
&
PID
=
$!
queue
$PID
else
eval
"ssh
$HOST
dd bs=4096 if=
$fname
of=
${
RAMDISK
}
/
$(
basename
$fname
)
"
&
PID
=
$!
queue
$PID
fi
done
fi
# TODO: check for dereferencing links, before enabling
# TODO: check for performance, before re-enabling
#sbcast $FILE $RAMDISK/$(basename $FILE)
done
done
#DATABASE=$RAMDISK/$DATABASE
DATABASE
=
$RAMDISK
#/$(basename $DATABASEPATH)
WORKDIR
=
$PWD
/
$BLASTDIR
/
$SLURM_JOB_NAME
WORKDIR
=
$PWD
/
$BLASTDIR
/
$SLURM_JOB_NAME
# this script may never output to a user's $HOME
# this script may never output to a user's $HOME
if
[[
*
"
$WORKDIR
"
*
=
'home'
]]
;
then
if
[[
*
"
$WORKDIR
"
*
=
'home'
]]
;
then
eror
"Cowardly refusing to operate in a home directory."
er
r
or
"Cowardly refusing to operate in a home directory."
fi
fi
# set path names to ease maintance
# set path names to ease maintance
SPLITFILEDIR
=
scratch
SPLITFILEDIR
=
scratch
...
@@ -709,9 +717,8 @@ if [ ! -d "$WORKDIR/$SPLITFILEDIR" ]; then
...
@@ -709,9 +717,8 @@ if [ ! -d "$WORKDIR/$SPLITFILEDIR" ]; then
mkdir
-p
"
$WORKDIR
/output"
||
exit
1
;
mkdir
-p
"
$WORKDIR
/output"
||
exit
1
;
cd
"
$WORKDIR
"
cd
"
$WORKDIR
"
echo
"executing scratch generator on
$FASTA
(
$_arg_splitup_per_queryfile
entries per file)"
echo
"executing scratch generator on
$FASTA
(
$_arg_splitup_per_queryfile
entries per file)"
eval
"
${
SCRIPT_PATH
}
/splitter.py
$FASTA
$_arg_splitup_per_queryfile
"
&
# splitup queryfile
"
${
SCRIPT_PATH
}
/splitter.py
$FASTA
$_arg_splitup_per_queryfile
"
&
# splitup queryfile
PID
=
$!
queue
$!
queue
$PID
fi
fi
# wait until the copy and a possible scratch generation are finished
# wait until the copy and a possible scratch generation are finished
...
@@ -720,6 +727,16 @@ while [[ ! -z "$(echo $QUEUE| tr -d ' ')" ]]; do
...
@@ -720,6 +727,16 @@ while [[ ! -z "$(echo $QUEUE| tr -d ' ')" ]]; do
sleep
5
sleep
5
done
done
set
-x
ls
$RAMDISK
/
*
DATABASE
=
$(
find
$RAMDISK
-name
"*
${
DBSUFFIX
}
"
-print
-quit
)
set
+x
#DATABASE=$RAMDISK/db${DBSUFFIX} #/$(basename $DATABASEPATH)
if
[[
-z
$DATABASE
]]
;
then
error
"Unable to recognize database, please get in touch with hpc@uni-mainz.de"
exit
1
fi
cd
"
$WORKDIR
"
cd
"
$WORKDIR
"
# calculating the degree of parallelism is necessary in order not to oversaturate with srun processes.
# calculating the degree of parallelism is necessary in order not to oversaturate with srun processes.
...
@@ -728,15 +745,6 @@ if [[ -z "$SLURM_CPUS_PER_TASK" ]]; then
...
@@ -728,15 +745,6 @@ if [[ -z "$SLURM_CPUS_PER_TASK" ]]; then
declare
-i
SLURM_CPUS_PER_TASK
=
1
declare
-i
SLURM_CPUS_PER_TASK
=
1
fi
fi
if
[[
-z
$DATABASE
]]
;
then
error
"Unable to recognize database, please get in touch with hpc@uni-mainz.de"
fi
# see whether we find a file in the db
tmp
=
$(
find
$DATABASE
-type
f
-print
-quit
)
# remove the 2nd suffix
DATABASE
=
${
tmp
%.*
}
### a temporary script to conduct the alignment
### a temporary script to conduct the alignment
cmdfile
=
/localscratch/
$SLURM_JOB_ID
/dummy.sh
cmdfile
=
/localscratch/
$SLURM_JOB_ID
/dummy.sh
cmdfilewriter
cmdfilewriter
...
@@ -749,6 +757,12 @@ sbcast $cmdfile $newcmd
...
@@ -749,6 +757,12 @@ sbcast $cmdfile $newcmd
rm
$cmdfile
rm
$cmdfile
cmdfile
=
$newcmd
cmdfile
=
$newcmd
echo
"command file:"
cat
$newcmd
echo
ls
/localscratch/
$SLURM_JOBID
/ramdisk
### append a finishing token to the samples
### append a finishing token to the samples
samples+
=(
'done'
)
samples+
=(
'done'
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment