Personal tools
Parametric jobs port
This slightly modified version of the basic port explained in the previous document, but adapted to exploit the "parametric" job type available with the new WMS
Parametric version of the port
A (very) little improvement overt the basic port, this slightly modification of the previous files is designed to be submitted on a recent version of the WMS (like the one in euindia) using the parametric job feature.
This will allow to create one jdl, that spawns multiple jobs, with a different numerical index as argument for the executable.
A text file with the list of surl to process is sent with the Sandbox, and each job interprets the index as the position of the surl to process in the list.
Although this simplifies the task of submitting and checking the status of a simulation however, it does not simplifies the only relevant chore: the re-submission of failed jobs which encountered some error during the execution.
Here is the parametric version of the JDL:
# CSCDriver_parametric.jdl JobType = "Parametric"; Arguments = "_PARAM_"; Executable = "CSCDriver_parametric.py"; StdOutput = "output_PARAM_.txt"; StdError = "output_PARAM_.txt"; InputSandbox = {"surl_list.txt", "CSCDriver_parametric.py", "atlas_voms_cert"}; OutputSandbox = {"output_PARAM_.txt", "code/results.txt", "code/summary/summary_gridjob.root"}; ParameterStart = 1; ParameterStep = 1; Parameters = 10;
The list of surls is in a file called surl_list.txt and the voms file for the data is in the usual place. The user should change the Parameters value to match the number of surl in the list (the surls are numbered starting from 1 as in Fortran).
Keep in mind that the parametric feature in the middleware seems to bit kind of immature, therefore some errors could be encountered (we found, for example that the _PARAM_ keyword have to be present in the OutputSandbox, although sometimes doing so has no use).
Another weirdness encountered with the parametric jobs, is that they cannot be directed to a specific Computing Element with the -r option: this can however be overcome with a trick. Since the Requirements directive is still honored, the -r effect can be obtained adding something like this:
Requirements = other.GlueCEUniqueID=="ce-01.grid.sissa.it:2119/jobmanager-lcgpbs-euindia";
The Python script CSCDriver_parametric.py, which differs from the previous one only for the way it handles the arguments passed to it, is:
#!/usr/bin/env python ### ### Created by Riccardo Di Meo (International Centre for Theoretical ### Physic - EUIndiaGRID Project) and Stefano Cozzini (Democritos - SISSA) ### ### This work is licensed under the Creative Commons "2.5 Italy ### Attribution-Noncommercial" License. ### ### To view a copy of this license, visit ### "http://creativecommons.org/licenses/by-nc/2.5/it/legalcode" ### import os, sys, time # Error codes DONE = 0 ARG_ERR = 1 LIST_ERR = 2 PARAM_ERR = 3 VOMS_ERROR = 4 CODE_ERROR1 = 5 CODE_ERROR1 = 6 LCG_GT_ERR1 = 7 LCG_GT_ERR2 = 8 GSIFTP_ERR = 9 CSCD_ERROR = 10 UNEXP_EXCP = 11 # Name of the file that will hold the list of SURL SURL_FILE = "surl_list.txt" # Second parameter for CSCDriver BLOCKS = 100000 # Location where the tar package with the CSCDriver and library is # stored on the euindia grid CODE_LOCATION = "lfn:/grid/euindia/user/CSCDriver_code.tar.bz2" # BDII for atlas, should be correctly set up before the lcg-gt step LCG_GFAL_INFOSYS = "lcg-bdii.cern.ch:2170" # Name of the atlas certificate: must match the jdl input sandbox DATA_VOMS = "atlas_voms_cert" def log(msg, error = None): print msg if error != None: sys.exit(error) def run_CSCDisplay(): # First and only argument: the indes of the surl to work on try: surl_index = int(sys.argv[1]) except ValueError, err: log("the index passed is not an integer!", ARG_ERR) except IndexError, err: log("index not passed!", ARG_ERR) if surl_index <= 0: log("index should be > 0", ARG_ERR) # Open the file with the list of surl and pick the right one try: f = file(SURL_FILE) except IOError, err: log("SURL list (%s) not opened (%s)!" % (SURL_FILE, err), LIST_ERR) surl_list = [ surl[:-1] for surl in f.readlines() ] try: surl = surl_list[surl_index - 1] except IndexError, err: log("The list of SURLs doesn't contain %d elements!" % surl_index, LIST_ERR) if len(surl) == 0: log("The %dnth line in the SURLs list appear to be empty!" % surl_index, LIST_ERR) log("Processing the surl at line %d: %s" % (surl_index, surl)) # Change the permissions to the certificate try: os.chmod(DATA_VOMS, 0600) except OSError,err: log("Cannot chmod the data certificate: %s" % err, VOMS_ERROR) # Retrieve the program start = time.time() res = os.system("lcg-cp %s file:`pwd`/code.tar.bz2" % CODE_LOCATION) if res != 0: log("lcg-cp of the program returned %d" % res, CODE_ERROR1) # Unpack it res = os.system("tar xjf code.tar.bz2") if res != 0: log("untar of the program returned %d" % res, CODE_ERROR2) stop = time.time() log("Program set up in %d\"" % (stop - start)) # That was the last operation with the euindia certificates: # change the environment os.putenv("LCG_GFAL_INFOSYS", LCG_GFAL_INFOSYS) os.putenv("X509_USER_CERT", os.path.join(os.getcwd(), DATA_VOMS)) os.putenv("X509_USER_KEY", os.path.join(os.getcwd(), DATA_VOMS)) # Enter the data directory os.chdir("code/Data") # Get the turl for the surl start = time.time() res = os.popen("lcg-gt %s gsiftp" % surl, "r") tmp = res.readlines() if res.close() != None: log("Error executing lcg-gt (%d)!" % res, LCG_GT_ERR1) if tmp[0].find("gsiftp") != 0: log("Unrecognized string returned by lcg-gt (%s)!" % tmp[0][:-1], LCG_GT_ERR2) turl = tmp[0][:-1] stop = time.time() log("The turl was obtained in %d\"" % (stop - start)) # Download the turl start = time.time() res = os.system("globus-url-copy %s file:`pwd`/data.root" % turl) if res != 0: log("Failed to download the data from the SE (%d)" % res, GSIFTP_ERR) os.chdir("..") stop = time.time() log("Data downloaded in %d\"" % (stop - start)) # Create the .def file in sample f = file(os.path.join("samples", "gridjob.def"), "w") f.write("TITLE: gridjob\nData/data.root\n") f.close() # Export the LD_LIBRARY_PATH to include the root libs as well ldvar = os.getenv("LD_LIBRARY_PATH", None) rootpath = os.path.join(os.getcwd(), "lib") if ldvar == None: os.putenv("LD_LIBRARY_PATH", rootpath) else: os.putenv("LD_LIBRARY_PATH", ldvar + ":" + rootpath) # Run the code start = time.time() res = os.system("./CSCDriver gridjob %d" % BLOCKS) if res != 0: log("CSCDriver didn't returned 0 (%d)!" % res, CSCD_ERROR) stop = time.time() log("Program executed in %d\"" % (stop - start)) # Run the code only if called as a script if __name__ == "__main__": try: run_CSCDisplay() except SystemExit, code: sys.exit(code) except Exception, err: log("Uncaught exception (%s)!", UNEXP_EXCP)