Commit 07152181 authored by Christian Meesters's avatar Christian Meesters

removed 'old' example

parent 1f517bdf
#!/bin/sh
#-----------------------------------------------------------------
# Example SLURM job script to run parallel STAR
#-----------------------------------------------------------------
#SBATCH -J star_multiNode # Job name
#SBATCH -o slurmOutput/out_multiNode.%j.out # Specify stdout output file (%j expands to jobId)
#SBATCH -p nodeshort # Queue name
#SBATCH -A zdvhpc # Specify allocation to charge against
#SBATCH --exclude=a[0068,0077,0078]
#SBATCH --ntasks=8 # Total number of tasks
#SBATCH --ntasks-per-node=8
#SBATCH -B 4:8:2
#SBATCH -t 04:59:59 # Run time (hh:mm:ss) - 1.0 hours
#SBATCH --cpus-per-task=8
#SBATCH --hint=multithread
#SBATCH --distribution=block:cyclic
#SBATCH --exclusive
#SBATCH --mem-per-cpu=1024M
#SBATCH --mem_bind=verbose
#SBATCH --verbose
#SBATCH --hint=compute_bound
###############################################################################
# use ramdisk
##SBATCH --gres=ramdisk:100G
#RAMDISK=$JOBDIR/ramdisk
#cp $1 $RAMDISK/indices.tar.gz
#cd $RAMDISK
#tar -zxf indices.tar.gz
###############################################################################
# load modules
module load bio/STAR/2.5.0a
module load tools/parallel/20170622
###############################################################################
# important paths and environment
MY_MEM_PER_TASK=$(( $SLURM_CPUS_PER_TASK * $SLURM_MEM_PER_CPU ))
host=$(hostname -s)
JOBDIR=/localscratch/$SLURM_JOB_ID
FS_DIR=/project/zdvhpc/tron_genfusion/star/star_mapping_output/2x600MB_MCF7_INTERN_MK47_CTTGTA_L002/multNodes/${SLURM_JOB_NUM_NODES}_Nodes/$SLURM_JOB_ID
ALIGN_FS_DIR=/project/zdvhpc/tron_genfusion/star/star_mapping_output/2x600MB_MCF7_INTERN_MK47_CTTGTA_L002/multNodes/${SLURM_JOB_NUM_NODES}_Nodes/$SLURM_JOB_ID/alignments
LOG_FS_DIR=/project/zdvhpc/tron_genfusion/star/star_mapping_output/2x600MB_MCF7_INTERN_MK47_CTTGTA_L002/multNodes/${SLURM_JOB_NUM_NODES}_Nodes/$SLURM_JOB_ID/logs
MY_TIME_START=$(date "+%Y%m%d-%H%M%S")
MY_TASK_FILE=command_list_${SLURM_JOB_ID}.sh
STAR_INPUT=/project/zdvhpc/tron_genfusion/star/star_input/2x600MB_MCF7_INTERN_MK47_CTTGTA_L002/${SLURM_NTASKS}_tasksPerFastq
###############################################################################
# working directory and .sh-file storing gnu parallel commands
if [[ ! -d "$FS_DIR" ]]; then
echo "make directory $FS_DIR"
mkdir $FS_DIR
else
echo "[ERROR] could not make output directory"
exit 1
fi
if [[ ! -d "$LOG_FS_DIR" ]]; then
echo "make directory $LOG_FS_DIR"
mkdir $LOG_FS_DIR
else
echo "[ERROR] could not make log directory"
exit 1
fi
if [[ ! -d "$ALIGN_FS_DIR" ]]; then
echo "make directory $ALIGN_FS_DIR"
mkdir $ALIGN_FS_DIR
else
echo "[ERROR] could not make align directory"
exit 1
fi
if [[ -f "$MY_TASK_FILE" ]]; then
rm $MY_TASK_FILE
fi
###############################################################################
# read input from command line
if [ $# = 0 ]
then
echo "[INFO] no input parameters, must enter genome directory"
exit 1
else
echo "[INFO] received input parameters"
for i in "$@"
do
echo "input_parameter: $i"
case $i in
-e=*|--extension=*)
EXTENSION="${i#*=}"
shift # past argument=value
;;
-s=*|--searchpath=*)
SEARCHPATH="${i#*=}"
shift # past argument=value
;;
-l=*|--lib=*)
LIBPATH="${i#*=}"
shift # past argument=value
;;
--default)
DEFAULT=YES
shift # past argument with no value
;;
*)
# unknown option
;;
esac
done
MY_GENOME_DIR=$1
# MY_READ_L=$2
# MY_READ_R=$3
fi
###############################################################################
# print configuration
echo "[INFO] print environment"
printenv
echo "[INFO] running on $host"
echo "[INFO] assign $SLURM_JOB_NODELIST to current job $SLURM_JOB_ID"
echo "[INFO] assign $SLURM_CPUS_PER_TASK cpus per task.."
echo "[INFO] alloce ${MY_MEM_PER_TASK} memory per task"
echo "[INFO] output in job directory: $JOBDIR"
echo "[INFO] time run script start: " $MY_TIME_START
###############################################################################
# load genome into RAM and exit
srun --exclusive -N $SLURM_JOB_NUM_NODES -n $SLURM_JOB_NUM_NODES --ntasks-per-node=1 --cpus-per-task=1 --mem=$(($SLURM_CPUS_ON_NODE * $SLURM_MEM_PER_CPU)) --mem_bind=verbose --cpu_bind=verbose STAR --runThreadN 1 --outFileNamePrefix ${JOBDIR}/${SLURM_JOB_ID}- --genomeLoad LoadAndExit --genomeDir ${MY_GENOME_DIR}
sleep 2s
###############################################################################
# write STAR commands to file and call them with gnu parallel
dir_counter=0
for directory in $STAR_INPUT/*; do
echo "$directory"
my_counter=0
for filename in $(find ${directory}/*.fastq 2> /dev/null); do
my_counter=$(($my_counter + 1))
if [ $my_counter -lt 2 ];
then
MY_READ_L=$filename
else
MY_READ_R=$filename
fi
echo $filename
done
echo $MY_READ_L
echo $MY_READ_R
echo "srun -N1 -n1 --exclusive --cpus-per-task=$SLURM_CPUS_PER_TASK --mem_bind=verbose --mem=$MY_MEM_PER_TASK --cpu_bind=verbose STAR --runThreadN $SLURM_CPUS_PER_TASK --genomeLoad LoadAndKeep --genomeDir ${MY_GENOME_DIR} --readFilesIn ${MY_READ_L} ${MY_READ_R} --outFileNamePrefix ${JOBDIR}/${SLURM_JOB_ID}-$(( ($dir_counter%${SLURM_NTASKS_PER_NODE}) + 1 ))-" >> $MY_TASK_FILE
dir_counter=$(($dir_counter + 1))
done
echo " [INFO] printing ${MY_TASK_FILE}"
cat $MY_TASK_FILE
star_parallel="parallel --delay .5 -j $SLURM_NTASKS -P $SLURM_NTASKS --joblog parallelMapping.log --workdir $PWD --verbose --progress"
$star_parallel < $MY_TASK_FILE
rm $MY_TASK_FILE
###############################################################################
# remove genome from RAM
srun --exclusive -N $SLURM_JOB_NUM_NODES -n $SLURM_JOB_NUM_NODES --ntasks-per-node=1 --cpus-per-task=1 --mem_bind=verbose --mem=$(($SLURM_CPUS_ON_NODE * $SLURM_MEM_PER_CPU)) --cpu_bind=verbose STAR --runThreadN 1 --outFileNamePrefix ${JOBDIR}/${SLURM_JOB_ID}- --genomeLoad Remove --genomeDir ${MY_GENOME_DIR}
###############################################################################
# collect output data and copy to STAR-home
#sgather ${JOBDIR}/${SLURM_JOB_ID}-Aligned.out.sam ${SLURM_JOB_ID}-Aligned.out.sam
for infix in `seq 1 $SLURM_NTASKS_PER_NODE`; do
sgather ${JOBDIR}/${SLURM_JOB_ID}-${infix}-Log.final.out ${LOG_FS_DIR}/${SLURM_JOB_ID}-${infix}-Log.final.out
sgather ${JOBDIR}/${SLURM_JOB_ID}-${infix}-Aligned.out.sam ${ALIGN_FS_DIR}/${SLURM_JOB_ID}-${infix}-Aligned.out.sam
done
#sgather ${JOBDIR}/${SLURM_JOB_ID}-Log.final.out ${FS_DIR}/${SLURM_JOB_ID}-Log.final.out
#sgather ${JOBDIR}/${SLURM_JOB_ID}-Log.out ${SLURM_JOB_ID}-Log.out
#sgather ${JOBDIR}/${SLURM_JOB_ID}-Log.progress.out ${SLURM_JOB_ID}-Log.progress.out
#sgather ${JOBDIR}/${SLURM_JOB_ID}-SJ.out.tab ${SLURM_JOB_ID}-SJ.out.tab
###############################################################################
# success!
echo "[INFO] time run script end: " $(date "+%Y%m%d-%H%M%S")
echo "[INFO] finished job!"
exit 0
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment