Source code for mushroom_rl_benchmark.experiment.slurm.slurm_script

from pathlib import Path


[docs] def create_slurm_script(slurm_path, slurm_script_name='slurm.sh', **slurm_params): """ Function to create a slurm script in a specific directory Args: slurm_path (str): path to locate the slurm script; slurm_script_name (str, slurm.sh): name of the slurm script; **slurm_params: parameters for generating the slurm file content. Returns: The path to the slurm script. """ code = generate_slurm(**slurm_params) slurm_path = Path(slurm_path) slurm_path.mkdir(exist_ok=True) slurm_path = slurm_path / slurm_script_name with open(slurm_path, "w") as file: file.write(code) return slurm_path
[docs] def generate_slurm(exp_name, exp_dir_slurm, python_file, gres=None, project_name=None, partition=None, n_exp=1, max_concurrent_runs=None, memory=2000, hours=24, minutes=0, seconds=0): """ Function to generate the slurm file content. Args: exp_name (str): name of the experiment; exp_dir_slurm (str): directory where the slurm log files are located; python_file (str): path to the python file that should be executed; gres (str, None): request cluster resources. E.g. to add a GPU in the IAS cluster specify gres='gpu:rtx2080:1'; project_name (str, None): name of the slurm project; partition (str, None): name of the partition to be used. n_exp (int, 1): number of experiments in the slurm array; max_concurrent_runs (int, None): maximum number of runs that should be executed in parallel on the SLURM cluster; memory (int, 2000): memory limit in mega bytes (MB) for the slurm jobs; hours (int, 24): maximum number of execution hours for the slurm jobs; minutes (int, 0): maximum number of execution minutes for the slurm jobs; seconds (int, 0): maximum number of execution seconds for the slurm jobs. Returns: The slurm script as string. """ duration = to_duration(hours, minutes, seconds) project_name_option = '' partition_option = '' gres_option = '' job_array_option = '' if project_name: project_name_option = f'#SBATCH -A {project_name}\n' if partition: partition_option = f'#SBATCH -p {partition}\n' if gres: gres_option = f'#SBATCH --gres={gres}\n' if n_exp > 1: job_array_option += '#SBATCH -a 0-' + str(n_exp - 1) + ( '%{}'.format(max_concurrent_runs) if max_concurrent_runs is not None else '') + '\n' text_output_file = '#SBATCH -o ' + exp_dir_slurm + '/%A_%a.out\n' text_output_file += '#SBATCH -e ' + exp_dir_slurm + '/%A_%a.err\n' seed_specification = '--seed $SLURM_ARRAY_TASK_ID' else: text_output_file = '#SBATCH -o ' + exp_dir_slurm + '/%A.out\n' text_output_file += '#SBATCH -e ' + exp_dir_slurm + '/%A.err\n' seed_specification = '--seed 0' code = f"""\ #!/usr/bin/env bash ############################################################################### # SLURM Configurations # Optional parameters {project_name_option}{partition_option}{gres_option} # Mandatory parameters #SBATCH -J {exp_name} {job_array_option}#SBATCH -t {duration} #SBATCH -n 1 #SBATCH -c 1 #SBATCH --mem-per-cpu={memory} {text_output_file} ############################################################################### # Your PROGRAM call starts here echo "Starting Job $SLURM_JOB_ID, Index $SLURM_ARRAY_TASK_ID" # Program specific arguments CMD="python3 {python_file} ${{@:1}} {seed_specification}" echo \"$CMD\" eval $CMD """ return code
[docs] def to_duration(hours, minutes, seconds): h = "0" + str(hours) if hours < 10 else str(hours) m = "0" + str(minutes) if minutes < 10 else str(minutes) s = "0" + str(seconds) if seconds < 10 else str(seconds) return h + ":" + m + ":" + s