#-- Kopieren --- nicht auf Brutus ARCHIV=/net/astrogate/export/astro1data/horene;export ARCHIV SCRATCH=horene@brutus.ethz.ch:/cluster/scratch_xp/shareholder/astro/horene;export SCRATCH #-- auf Brutus -- ARCHIV=horene@astrogate.ethz.ch:/export/astro1data/horene;export ARCHIV SCRATCH=/cluster/scratch_xp/shareholder/astro/horene;export SCRATCH DIRNAME=sci_testLTE;export DIRNAME DIRNAME=sci_test;export DIRNAME DIRNAME=sci_testFe23k;export DIRNAME DIRNAME=mhd704000;export DIRNAME DIRNAME=test1small;export DIRNAME DIRNAME=test1Dparallel;export DIRNAME DIRNAME=mhdLTE1D210000;export DIRNAME DIRNAME=mhdNLTE1D210000;export DIRNAME RTEDIR=$SCRATCH/$DIRNAME/rh;export RTEDIR RTE1DDIR=$SCRATCH/$DIRNAME/job/rh;export RTEDIR echo $RTEDIR RUN=run;export RUN RAYDIR=$RTEDIR/../../../rayinput OS=Linux;export OS CPU=x86_64;export CPU scp -r $ARCHIV/brutus/preps/$DIRNAME $SCRATCH scp -r $ARCHIV/brutus/preps/$DIRNAME/job $SCRATCH/$DIRNAME #scp -r $ARCHIV/brutus/preps/vorlage/rayinput $SCRATCH/$DIRNAME/ # scp -r $ARCHIV/brutus/preps/vorlage/rh/rhsc3d/run/*solv*.sh /cluster/scratch_xp/shareholder/astro/horene/sci_test/rh/rhsc3d/run #-- make ----------------- cd $RTEDIR rm *.a make cd $RTEDIR cd rhsc3d rm *.o make #------------------- cd $RTE1DDIR rm *.a make cd $RTE1DDIR cd rhf1d rm *.o make rm *.o cd $RTE1DDIR cd .. ./sub_job.sh Generic job. Job <66166125> is submitted to queue . Generic job. Job <66166126> is submitted to queue . Generic job. Job <66166127> is submitted to queue . Generic job. Job <66166128> is submitted to queue . [horene@brutus2 job]$ bjobs JOBID USER STAT QUEUE FROM_HOST EXEC_HOST JOB_NAME SUBMIT_TIME 66166125 horene RUN pub.8h brutus2 a6044 *d_job.sh Aug 27 11:30 66166126 horene RUN pub.8h brutus2 a6084 *d_job.sh Aug 27 11:30 66166127 horene RUN pub.8h brutus2 a6084 *d_job.sh Aug 27 11:30 66166128 horene RUN pub.8h brutus2 a6068 *d_job.sh Aug 27 11:30 bpeek 66166125 |grep ERROR bpeek 66166125 |grep WARNING bpeek 66166125 |grep MHD bbjobs 66166125 ->> halbe Stunde für ca 1/5 der Atmosphären einer Reihe mit 1024, praktisch keine CPU!! ------------------------------------------------------------ # LSBATCH: User input ./runintemp.sh ./run1d_job.sh ------------------------------------------------------------ Successfully completed. Resource usage summary: CPU time : 6818.57 sec. Max Memory : 32 MB Max Swap : 2692 MB Max Processes : 8 Max Threads : 42 The output (if any) follows: /scratch/66166126.tmpdir/8366 #------------------- cd $RTEDIR/Atmos ;cp ../../../sci_test/rh/Atmos/mhd_7*288* . cd $RTEDIR/rhsc3d cp -r runcopy $RUN cd $RTEDIR/rhsc3d/$RUN #-Ersetzen von Text in Dateien sed 's/_x0=0_y0=/_x0=1056_y0=/g' keyword.input.nob > keyword.input.sed mv keyword.input.sed keyword.input.nob sed 's/_x0=0_y0=/_x0=1056_y0=/g' keyword.input.b > keyword.input.sed mv keyword.input.sed keyword.input.b #------------------- lfs quota -u horene /cluster/scratch_xl lfs quota -u horene /cluster/scratch_xp cd $RTEDIR/rhsc3d/$RUN bsub -o job.%J.log -W 540:00 -N -n 32 -R "rusage[mem=10000]" < runmainjob.sh bsub -o job.%J.log -W 360:00 -N -n 32 -R "rusage[mem=10000]" < runmainjob.sh bsub -o job.%J.log -W 168:00 -N -n 32 -R "rusage[mem=10000]" < runmainjob.sh bsub -o job.%J.log -W 36:00 -N -n 32 -R "rusage[mem=10000]" < runmainjob.sh #-- LTE cd $RTEDIR/rhsc3d/$RUN bsub -o job.%J.log -W 72:00 -N -n 32 -R "rusage[mem=10000]" < runmainjob.lte.sh bsub -o job.%J.log -W 72:00 -N -n 32 -R "rusage[mem=10000]" -w "ended(52789198)" < runmainjob.lte.sh bsub -o solveray.%J.log -W 36:00 -N -n 32 -R "rusage[mem=20000]" < runsolveray.sh bsub -o solveray.%J.log -W 168:00 -N -n 32 -R "rusage[mem=20000]" < runsolveray.sh ---- Chained --------- bsub -J job1 -o job.%J.log -W 360:00 -N -n 32 -R "rusage[mem=10000]" < runmainjob.sh bsub -w "done(job1)" -o solveray.%J.log -W 36:00 -N -n 32 -R "rusage[mem=20000]" < runsolveray.sh bsub -o remove.%J.log -W 1:00 -w "ended(52789198)" < remove_files.sh 52789198 möglicherweise auch mehrere solveray, falls in versch. Unterverzeichnissen ... Kopieren von J.dat, pops etc? -> Schauen dass alles in einem solveray passt ! Queue-Walltime >36 falls Schleife!!! ---- many rays in parallel (ACHTUNG: Zeit anpassen falls mehrere in Schleife wie jetzt pro solveray? ca 10h pro solveray (s. Test Fux)--------- cd $RTEDIR/rhsc3d for t in 1 2 3 4 5 do cp -r $RTEDIR/rhsc3d/$RUN $RTEDIR/rhsc3d/run$t cd $RTEDIR/rhsc3d/run$t bsub -o solveray.%J.log -W 36:00 -N -n 32 -R "rusage[mem=20000]" < runsolveray$t.sh done cd $RTEDIR/rhsc3d for t in 1 2 3 4 5 do mv run$t/spectrum_* ../$RUN done cd $RTEDIR/rhsc3d for t in 1 2 3 4 5 do rm -r run$t done ;---------- bbjobs bqueues -l special bjobs -h bjobs -q special -u all bjobs -q special -u all -l -p bjobs JOBID USER STAT QUEUE FROM_HOST EXEC_HOST JOB_NAME SUBMIT_TIME 52289433 horene RUN special brutus1 32*a1004 *../rhsc3d Mar 27 10:05 lsload a1004 HOST_NAME status r15s r1m r15m ut pg ls it tmp swp mem a1004 ok 1.0 1.0 1.2 3% 0.0 0 4592 5616M 4094M 953G lsload |grep a1 a1* sind die fat nodes Output prüfen: bpeek 52289433 #------------------- Zurückkopieren #------------------- ARCHIV=horene@astrogate.ethz.ch:/export/astro1data/horene;export ARCHIV SCRATCH=/cluster/scratch_xp/shareholder/astro/horene;export SCRATCH DIRNAME=rh;export DIRNAME DIRNAME=sci_testFe23k;export DIRNAME DIRNAME=test1Dparallel;export DIRNAME DIRNAME=mhdLTE1D210000;export DIRNAME ;-- ohne input Directory rsync -av --exclude input $SCRATCH/$DIRNAME horene@astrogate.ethz.ch:/export/astro1data/horene/brutus/results ;-- alles rsync -av $SCRATCH/$DIRNAME horene@astrogate.ethz.ch:/export/astro1data/horene/brutus/results ------------------------------------------- ---- LTE und NLTE - m> mit verschiedenen keyword.input wäre das möglich! solveray wäre identisch! Kopieren der Resultate müsste noch sep. gemacht werden! bsub Submit a job to the batch system. http://www.brutuswiki.ethz.ch/brutus/LSF_mini_reference -W HH:MM Wall-clock time required by the job. Can also be expressed in minutes. -n N Number of processors required by the job. -R "rusage[mem=X]" Amount of memory (in MB per processor) required by the job. -o outfile Append the job's output (stdout) to outfile. The keyword "%J" is interpreted as the job's numerical ID. -e errfile Append the job's error (stderr) to errfile. By default, stderr is merged with stdout. -oo outfile Write the job's output (stdout) to outfile, overwriting it if it already exists. -eo errfile Write the job's error (stderr) to errfile, overwriting it if it already exists. -I / -Ip / -Is Run the job interactively. Input/output are redirected from/to your terminal. Use -Ip to create a pseudo-terminal, and -Is to enable shell support. -J jobname Assign a (non necessarily unique) name to the job. Used to define job chains. To avoid confusion with numerical job IDs, jobname should contain at least one letter. -w "depcond" Wait (do not start the job) until the specified dependency condition is satisfied. For example: "done(jobID)", "ended(jobname)". Quotes are recommended. -B / -N Send an e-mail to the job's owner (username@brutus.ethz.ch) when the job begins / ends. -u user Send e-mail to user instead of the job's owner. The recipient's address must be inside the ETH domain. The firewall blocks e-mail sent to other addresses. -r Indicate that the job is re-runnable. If the compute node where your job is running crashes, LSF will automatically re-run it from the beginning on a different node.