Commit 5ed2df07 authored by Christian Meesters's avatar Christian Meesters

Merge branch 'master' into 'devel'

# Conflicts:
#   blast/parallel_BLAST/LA_Wrapper
parents 603612c7 15179dd6
......@@ -16,7 +16,7 @@ modification, are permitted provided that the following conditions are met:
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
ARE DISCLAIMED. IN NO EVENT SHALL CHRISTIAN MEESTERS OR THE JGU BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
......
#!/bin/sh
#-----------------------------------------------------------------
# Example SLURM job script to run parallel STAR
#-----------------------------------------------------------------
#SBATCH -J star_multiNode # Job name
#SBATCH -o slurmOutput/out_multiNode.%j.out # Specify stdout output file (%j expands to jobId)
#SBATCH -p nodeshort # Queue name
#SBATCH -A zdvhpc # Specify allocation to charge against
#SBATCH --exclude=a[0068,0077,0078]
#SBATCH --ntasks=8 # Total number of tasks
#SBATCH --ntasks-per-node=8
#SBATCH -B 4:8:2
#SBATCH -t 04:59:59 # Run time (hh:mm:ss) - 1.0 hours
#SBATCH --cpus-per-task=8
#SBATCH --hint=multithread
#SBATCH --distribution=block:cyclic
#SBATCH --exclusive
#SBATCH --mem-per-cpu=1024M
#SBATCH --mem_bind=verbose
#SBATCH --verbose
#SBATCH --hint=compute_bound
###############################################################################
# use ramdisk
##SBATCH --gres=ramdisk:100G
#RAMDISK=$JOBDIR/ramdisk
#cp $1 $RAMDISK/indices.tar.gz
#cd $RAMDISK
#tar -zxf indices.tar.gz
###############################################################################
# load modules
module load bio/STAR/2.5.0a
module load tools/parallel/20170622
###############################################################################
# important paths and environment
MY_MEM_PER_TASK=$(( $SLURM_CPUS_PER_TASK * $SLURM_MEM_PER_CPU ))
host=$(hostname -s)
JOBDIR=/localscratch/$SLURM_JOB_ID
FS_DIR=/project/zdvhpc/tron_genfusion/star/star_mapping_output/2x600MB_MCF7_INTERN_MK47_CTTGTA_L002/multNodes/${SLURM_JOB_NUM_NODES}_Nodes/$SLURM_JOB_ID
ALIGN_FS_DIR=/project/zdvhpc/tron_genfusion/star/star_mapping_output/2x600MB_MCF7_INTERN_MK47_CTTGTA_L002/multNodes/${SLURM_JOB_NUM_NODES}_Nodes/$SLURM_JOB_ID/alignments
LOG_FS_DIR=/project/zdvhpc/tron_genfusion/star/star_mapping_output/2x600MB_MCF7_INTERN_MK47_CTTGTA_L002/multNodes/${SLURM_JOB_NUM_NODES}_Nodes/$SLURM_JOB_ID/logs
MY_TIME_START=$(date "+%Y%m%d-%H%M%S")
MY_TASK_FILE=command_list_${SLURM_JOB_ID}.sh
STAR_INPUT=/project/zdvhpc/tron_genfusion/star/star_input/2x600MB_MCF7_INTERN_MK47_CTTGTA_L002/${SLURM_NTASKS}_tasksPerFastq
###############################################################################
# working directory and .sh-file storing gnu parallel commands
if [[ ! -d "$FS_DIR" ]]; then
echo "make directory $FS_DIR"
mkdir $FS_DIR
else
echo "[ERROR] could not make output directory"
exit 1
fi
if [[ ! -d "$LOG_FS_DIR" ]]; then
echo "make directory $LOG_FS_DIR"
mkdir $LOG_FS_DIR
else
echo "[ERROR] could not make log directory"
exit 1
fi
if [[ ! -d "$ALIGN_FS_DIR" ]]; then
echo "make directory $ALIGN_FS_DIR"
mkdir $ALIGN_FS_DIR
else
echo "[ERROR] could not make align directory"
exit 1
fi
if [[ -f "$MY_TASK_FILE" ]]; then
rm $MY_TASK_FILE
fi
###############################################################################
# read input from command line
if [ $# = 0 ]
then
echo "[INFO] no input parameters, must enter genome directory"
exit 1
else
echo "[INFO] received input parameters"
for i in "$@"
do
echo "input_parameter: $i"
case $i in
-e=*|--extension=*)
EXTENSION="${i#*=}"
shift # past argument=value
;;
-s=*|--searchpath=*)
SEARCHPATH="${i#*=}"
shift # past argument=value
;;
-l=*|--lib=*)
LIBPATH="${i#*=}"
shift # past argument=value
;;
--default)
DEFAULT=YES
shift # past argument with no value
;;
*)
# unknown option
;;
esac
done
MY_GENOME_DIR=$1
# MY_READ_L=$2
# MY_READ_R=$3
fi
###############################################################################
# print configuration
echo "[INFO] print environment"
printenv
echo "[INFO] running on $host"
echo "[INFO] assign $SLURM_JOB_NODELIST to current job $SLURM_JOB_ID"
echo "[INFO] assign $SLURM_CPUS_PER_TASK cpus per task.."
echo "[INFO] alloce ${MY_MEM_PER_TASK} memory per task"
echo "[INFO] output in job directory: $JOBDIR"
echo "[INFO] time run script start: " $MY_TIME_START
###############################################################################
# load genome into RAM and exit
srun --exclusive -N $SLURM_JOB_NUM_NODES -n $SLURM_JOB_NUM_NODES --ntasks-per-node=1 --cpus-per-task=1 --mem=$(($SLURM_CPUS_ON_NODE * $SLURM_MEM_PER_CPU)) --mem_bind=verbose --cpu_bind=verbose STAR --runThreadN 1 --outFileNamePrefix ${JOBDIR}/${SLURM_JOB_ID}- --genomeLoad LoadAndExit --genomeDir ${MY_GENOME_DIR}
sleep 2s
###############################################################################
# write STAR commands to file and call them with gnu parallel
dir_counter=0
for directory in $STAR_INPUT/*; do
echo "$directory"
my_counter=0
for filename in $(find ${directory}/*.fastq 2> /dev/null); do
my_counter=$(($my_counter + 1))
if [ $my_counter -lt 2 ];
then
MY_READ_L=$filename
else
MY_READ_R=$filename
fi
echo $filename
done
echo $MY_READ_L
echo $MY_READ_R
echo "srun -N1 -n1 --exclusive --cpus-per-task=$SLURM_CPUS_PER_TASK --mem_bind=verbose --mem=$MY_MEM_PER_TASK --cpu_bind=verbose STAR --runThreadN $SLURM_CPUS_PER_TASK --genomeLoad LoadAndKeep --genomeDir ${MY_GENOME_DIR} --readFilesIn ${MY_READ_L} ${MY_READ_R} --outFileNamePrefix ${JOBDIR}/${SLURM_JOB_ID}-$(( ($dir_counter%${SLURM_NTASKS_PER_NODE}) + 1 ))-" >> $MY_TASK_FILE
dir_counter=$(($dir_counter + 1))
done
echo " [INFO] printing ${MY_TASK_FILE}"
cat $MY_TASK_FILE
star_parallel="parallel --delay .5 -j $SLURM_NTASKS -P $SLURM_NTASKS --joblog parallelMapping.log --workdir $PWD --verbose --progress"
$star_parallel < $MY_TASK_FILE
rm $MY_TASK_FILE
###############################################################################
# remove genome from RAM
srun --exclusive -N $SLURM_JOB_NUM_NODES -n $SLURM_JOB_NUM_NODES --ntasks-per-node=1 --cpus-per-task=1 --mem_bind=verbose --mem=$(($SLURM_CPUS_ON_NODE * $SLURM_MEM_PER_CPU)) --cpu_bind=verbose STAR --runThreadN 1 --outFileNamePrefix ${JOBDIR}/${SLURM_JOB_ID}- --genomeLoad Remove --genomeDir ${MY_GENOME_DIR}
###############################################################################
# collect output data and copy to STAR-home
#sgather ${JOBDIR}/${SLURM_JOB_ID}-Aligned.out.sam ${SLURM_JOB_ID}-Aligned.out.sam
for infix in `seq 1 $SLURM_NTASKS_PER_NODE`; do
sgather ${JOBDIR}/${SLURM_JOB_ID}-${infix}-Log.final.out ${LOG_FS_DIR}/${SLURM_JOB_ID}-${infix}-Log.final.out
sgather ${JOBDIR}/${SLURM_JOB_ID}-${infix}-Aligned.out.sam ${ALIGN_FS_DIR}/${SLURM_JOB_ID}-${infix}-Aligned.out.sam
done
#sgather ${JOBDIR}/${SLURM_JOB_ID}-Log.final.out ${FS_DIR}/${SLURM_JOB_ID}-Log.final.out
#sgather ${JOBDIR}/${SLURM_JOB_ID}-Log.out ${SLURM_JOB_ID}-Log.out
#sgather ${JOBDIR}/${SLURM_JOB_ID}-Log.progress.out ${SLURM_JOB_ID}-Log.progress.out
#sgather ${JOBDIR}/${SLURM_JOB_ID}-SJ.out.tab ${SLURM_JOB_ID}-SJ.out.tab
###############################################################################
# success!
echo "[INFO] time run script end: " $(date "+%Y%m%d-%H%M%S")
echo "[INFO] finished job!"
exit 0
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment