Source code for mushroom_rl_benchmark.experiment.slurm.slurm_script

from pathlib import Path


[docs]def create_slurm_script(slurm_path, slurm_script_name='slurm.sh', **slurm_params): """ Function to create a slurm script in a specific directory Args: slurm_path (str): path to locate the slurm script; slurm_script_name (str, slurm.sh): name of the slurm script; **slurm_params: parameters for generating the slurm file content. Returns: The path to the slurm script. """ code = generate_slurm(**slurm_params) slurm_path = Path(slurm_path) slurm_path.mkdir(exist_ok=True) slurm_path = slurm_path / slurm_script_name with open(slurm_path, "w") as file: file.write(code) return slurm_path
[docs]def generate_slurm(exp_name, exp_dir_slurm, python_file, gres=None, project_name=None, n_exp=1, max_concurrent_runs=None, memory=2000, hours=24, minutes=0, seconds=0): """ Function to generate the slurm file content. Args: exp_name (str): name of the experiment; exp_dir_slurm (str): directory where the slurm log files are located; python_file (str): path to the python file that should be executed; gres (str, None): request cluster resources. E.g. to add a GPU in the IAS cluster specify gres='gpu:rtx2080:1'; project_name (str, None): name of the slurm project; n_exp (int, 1): number of experiments in the slurm array; max_concurrent_runs (int, None): maximum number of runs that should be executed in parallel on the SLURM cluster; memory (int, 2000): memory limit in mega bytes (MB) for the slurm jobs; hours (int, 24): maximum number of execution hours for the slurm jobs; minutes (int, 0): maximum number of execution minutes for the slurm jobs; seconds (int, 0): maximum number of execution seconds for the slurm jobs. Returns: The slurm script as string. """ duration = to_duration(hours, minutes, seconds) code = """\ #!/usr/bin/env bash ############################################################################### # SLURM Configurations """ if project_name: code += '#SBATCH -A ' + project_name + '\n' code += '#SBATCH -J ' + exp_name + '\n' if n_exp > 1: code += '#SBATCH -a 0-' + str(n_exp-1) + ('%{}'.format(max_concurrent_runs) if max_concurrent_runs is not None else '') + '\n' code += '#SBATCH -t ' + duration + '\n' code += """\ #SBATCH -n 1 #SBATCH -c 1 """ code += '#SBATCH --mem-per-cpu=' + str(memory) + '\n' if gres: code += '#SBATCH --gres=' + str(gres) + '\n' if n_exp > 1: code += '#SBATCH -o ' + exp_dir_slurm + '/%A_%a.out\n' code += '#SBATCH -e ' + exp_dir_slurm + '/%A_%a.err\n' else: code += '#SBATCH -o ' + exp_dir_slurm + '/%A.out\n' code += '#SBATCH -e ' + exp_dir_slurm + '/%A.err\n' code += """\ ############################################################################### # Your PROGRAM call starts here echo "Starting Job $SLURM_JOB_ID, Index $SLURM_ARRAY_TASK_ID" # Program specific arguments """ code += 'CMD="python3 ' + python_file + ' \\\n' code += "\t\t${@:1}\\\n" if n_exp > 1: code += '\t\t--seed $SLURM_ARRAY_TASK_ID' else: code += '\t\t--seed 0' code += '"\n\n' code += 'echo "$CMD"\n' code += 'eval $CMD\n' return code
[docs]def to_duration(hours, minutes, seconds): h = "0" + str(hours) if hours < 10 else str(hours) m = "0" + str(minutes) if minutes < 10 else str(minutes) s = "0" + str(seconds) if seconds < 10 else str(seconds) return h + ":" + m + ":" + s