cryoSPARC Cluster Integration Script Examples

Examples of cluster_info.json and cluster_script.sh scripts for various cluster workload managers

SLURM

Example A

cluster_info.json
{
"qdel_cmd_tpl": "scancel {{ cluster_job_id }}",
"worker_bin_path": "/home/cryosparcuser/cryosparc_worker/bin/cryosparcw",
"title": "debug_cluster",
"cache_path": "/ssd/tmp",
"qinfo_cmd_tpl": "sinfo --format='%.8N %.6D %.10P %.6T %.14C %.5c %.6z %.7m %.7G %.9d %20E'",
"qsub_cmd_tpl": "sbatch {{ script_path_abs }}",
"qstat_cmd_tpl": "squeue -j {{ cluster_job_id }}",
"cache_quota_mb": null,
"send_cmd_tpl": "{{ command }}",
"cache_reserve_mb": 10000,
"name": "debug_cluster"
}
cluster_script.sh
#!/bin/bash
#SBATCH --job-name=cryosparc_{{ project_uid }}_{{ job_uid }}
#SBATCH --partition=debug
#SBATCH --output={{ job_log_path_abs }}
#SBATCH --error={{ job_log_path_abs }}
#SBATCH --nodes=1
#SBATCH --mem={{ (ram_gb*1000)|int }}M
#SBATCH --ntasks-per-node=1
#SBATCH --cpus-per-task={{ num_cpu }}
#SBATCH --gres=gpu:{{ num_gpu }}
#SBATCH --gres-flags=enforce-binding
available_devs=""
for devidx in $(seq 1 16);
do
if [[ -z $(nvidia-smi -i $devidx --query-compute-apps=pid --format=csv,noheader) ]] ; then
if [[ -z "$available_devs" ]] ; then
available_devs=$devidx
else
available_devs=$available_devs,$devidx
fi
fi
done
export CUDA_VISIBLE_DEVICES=$available_devs
srun {{ run_cmd }}

Example B

cluster_info.json
{
"qdel_cmd_tpl": "scancel {{ cluster_job_id }}",
"worker_bin_path": "/home/cryosparcuser/cryosparc_worker/bin/cryosparcw",
"title": "test",
"cache_path": "",
"qinfo_cmd_tpl": "sinfo",
"qsub_cmd_tpl": "sbatch {{ script_path_abs }}",
"qstat_cmd_tpl": "squeue -j {{ cluster_job_id }}",
"send_cmd_tpl": "{{ command }}",
"name": "test"
}
cluster_script.sh
#!/bin/bash
#SBATCH --job-name=cryosparc_{{ project_uid }}_{{ job_uid }}
#SBATCH --output={{ job_log_path_abs }}
#SBATCH --error={{ job_log_path_abs }}
#SBATCH --ntasks={{ num_cpu }}
#SBATCH --mem={{ (ram_gb*1000)|int }}M
#SBATCH --cpus-per-task=1
#SBATCH --gres=gpu:{{ num_gpu }}
#SBATCH --gres-flags=enforce-binding
available_devs=""
for devidx in $(seq 1 16);
do
if [[ -z $(nvidia-smi -i $devidx --query-compute-apps=pid --format=csv,noheader) ]] ; then
if [[ -z "$available_devs" ]] ; then
available_devs=$devidx
else
available_devs=$available_devs,$devidx
fi
fi
done
export CUDA_VISIBLE_DEVICES=$available_devs
srun {{ run_cmd }}

Example C

cluster_script.sh
#!/bin/bash
#SBATCH --partition=gpu
#SBATCH --nodes=1
#SBATCH --ntasks={{ num_cpu }}
#SBATCH --gres=gpu:{{ num_gpu }}
#SBATCH --time=48:00:00
#SBATCH --mem={{ (ram_gb)|int }}GB
#SBATCH --exclusive
#SBATCH --job-name cspark_{{ project_uid }}_{{ job_uid }}
#SBATCH --output={{ job_dir_abs }}/output.txt
#SBATCH --error={{ job_dir_abs }}/error.txt
{{ run_cmd }}

Example D

cluster_script.sh
#!/bin/bash
#SBATCH --job-name=cryosparc_{{ project_uid }}_{{ job_uid }}
#SBATCH --partition=q2
#SBATCH --output={{ job_log_path_abs }}
#SBATCH --error={{ job_log_path_abs }}
{%- if num_gpu == 0 %}
#SBATCH --ntasks={{ num_cpu }}
#SBATCH --cpus-per-task=1
#SBATCH --threads-per-core=1
{%- else %}
#SBATCH --nodes=1
#SBATCH --ntasks-per-node={{ num_cpu }}
#SBATCH --cpus-per-task=1
#SBATCH --threads-per-core=1
#SBATCH --gres=gpu:{{ num_gpu }}
#SBATCH --gres-flags=enforce-binding
{%- endif %}
available_devs=""
for devidx in $(seq 1 16);
do
if [[ -z $(nvidia-smi -i $devidx --query-compute-apps=pid --format=csv,noheader) ]] ; then
if [[ -z "$available_devs" ]] ; then
available_devs=$devidx
else
available_devs=$available_devs,$devidx
fi
fi
done
export CUDA_VISIBLE_DEVICES=$available_devs
{{ run_cmd }}

Example E

cluster_script.sh
{%- macro _min(a, b) -%}
{%- if a <= b %}{{a}}{% else %}{{b}}{% endif -%}
{%- endmacro -%}
#SBATCH --job-name=cryosparc_{{ project_uid }}_{{ job_uid }}
#SBATCH --output={{ job_log_path_abs }}.out
#SBATCH --error={{ job_log_path_abs }}.err
#SBATCH --cpus-per-task=1
#SBATCH --threads-per-core=1
#SBATCH --partition=gpu
#SBATCH --exclusive
#SBATCH --mem=100000
{%- if num_gpu == 0 %}
# Use CPU cluster
#SBATCH --constraint=mc
#SBATCH --ntasks={{ num_cpu }}
available_devs=""
for devidx in $(seq 1 16);
do
if [[ -z $(nvidia-smi -i $devidx --query-compute-apps=pid --format=csv,noheader) ]] ; then
if [[ -z "$available_devs" ]] ; then
available_devs=$devidx
else
available_devs=$available_devs,$devidx
fi
fi
done
export CUDA_VISIBLE_DEVICES=$available_devs

PBS

Example A

cluster_info.json
{
"name" : "pbscluster",
"worker_bin_path" : "/path/to/cryosparc_worker/bin/cryosparcw",
"cache_path" : "/path/to/local/SSD/on/cluster/nodes"
"send_cmd_tpl" : "ssh loginnode {{ command }}",
"qsub_cmd_tpl" : "qsub {{ script_path_abs }}",
"qstat_cmd_tpl" : "qstat -as {{ cluster_job_id }}",
"qdel_cmd_tpl" : "qdel {{ cluster_job_id }}",
"qinfo_cmd_tpl" : "qstat -q",
"transfer_cmd_tpl" : "scp {{ src_path }} loginnode:{{ dest_path }}"
}
cluster_script.sh
#!/bin/bash
#### cryoSPARC cluster submission script template for PBS
## Available variables:
## {{ run_cmd }} - the complete command string to run the job
## {{ num_cpu }} - the number of CPUs needed
## {{ num_gpu }} - the number of GPUs needed.
## Note: the code will use this many GPUs starting from dev id 0
## the cluster scheduler or this script have the responsibility
## of setting CUDA_VISIBLE_DEVICES so that the job code ends up
## using the correct cluster-allocated GPUs.
## {{ ram_gb }} - the amount of RAM needed in GB
## {{ job_dir_abs }} - absolute path to the job directory
## {{ project_dir_abs }} - absolute path to the project dir
## {{ job_log_path_abs }} - absolute path to the log file for the job
## {{ worker_bin_path }} - absolute path to the cryosparc worker command
## {{ run_args }} - arguments to be passed to cryosparcw run
## {{ project_uid }} - uid of the project
## {{ job_uid }} - uid of the job
## {{ job_creator }} - name of the user that created the job (may contain spaces)
## {{ cryosparc_username }} - cryosparc username of the user that created the job (usually an email)
##
## What follows is a simple PBS script:
#PBS -N cryosparc_{{ project_uid }}_{{ job_uid }}
#PBS -l select=1:ncpus={{ num_cpu }}:ngpus={{ num_gpu }}:mem={{ (ram_gb*1000)|int }}mb:gputype=P100
#PBS -o {{ job_dir_abs }}
#PBS -e {{ job_dir_abs }}
available_devs=""
for devidx in $(seq 1 16);
do
if [[ -z $(nvidia-smi -i $devidx --query-compute-apps=pid --format=csv,noheader) ]] ; then
if [[ -z "$available_devs" ]] ; then
available_devs=$devidx
else
available_devs=$available_devs,$devidx
fi
fi
done
export CUDA_VISIBLE_DEVICES=$available_devs
{{ run_cmd }}

UGE

Example A

cluster_info.json
{
"name" : "ugecluster",
"worker_bin_path" : "/u/cryosparcuser/cryosparc/cryosparc_worker/bin/cryosparcw",
"cache_path" : "/scratch/cryosparc_cache",
"send_cmd_tpl" : "{{ command }}",
"qsub_cmd_tpl" : "qsub {{ script_path_abs }}",
"qstat_cmd_tpl" : "qstat -j {{ cluster_job_id }}",
"qdel_cmd_tpl" : "qdel {{ cluster_job_id }}",
"qinfo_cmd_tpl" : "qstat -q default.q",
"transfer_cmd_tpl" : "scp {{ src_path }} uoft:{{ dest_path }}"
}
cluster_script.sh
#!/bin/bash
## What follows is a simple UGE script:
## Job Name
#$ -N cryosparc_{{ project_uid }}_{{ job_uid }}
## Number of CPUs (select 1 CPU always, and oversubscribe as GPU is per core value)
##$ -pe smp {{ num_cpu }}
#$ -pe smp 1
## Memory per CPU core
#$ -l m_mem_free={{ (ram_gb)|int }}G
## Number of GPUs
#$ -l gpu_card={{ num_gpu }}
## Time limit 4 days
#$ -l h_rt=345600
## STDOUT/STDERR
#$ -o {{ job_dir_abs }}/uge.log
#$ -e {{ job_dir_abs }}/uge.log
#$ -j y
## Number of threads
export OMP_NUM_THREADS={{ num_cpu }}
echo "HOSTNAME: $HOSTNAME"
available_devs=""
for devidx in $(seq 1 16);
do
if [[ -z $(nvidia-smi -i $devidx --query-compute-apps=pid --format=csv,noheader) ]] ; then
if [[ -z "$available_devs" ]] ; then
available_devs=$devidx
else
available_devs=$available_devs,$devidx
fi
fi
done
export CUDA_VISIBLE_DEVICES=$available_devs
{{ run_cmd }}