Package ClusterShell :: Package Worker :: Module Pdsh
[hide private]
[frames] | no frames]

Source Code for Module ClusterShell.Worker.Pdsh

  1  # 
  2  # Copyright CEA/DAM/DIF (2007, 2008, 2009, 2010) 
  3  #  Contributor: Stephane THIELL <stephane.thiell@cea.fr> 
  4  # 
  5  # This file is part of the ClusterShell library. 
  6  # 
  7  # This software is governed by the CeCILL-C license under French law and 
  8  # abiding by the rules of distribution of free software.  You can  use, 
  9  # modify and/ or redistribute the software under the terms of the CeCILL-C 
 10  # license as circulated by CEA, CNRS and INRIA at the following URL 
 11  # "http://www.cecill.info". 
 12  # 
 13  # As a counterpart to the access to the source code and  rights to copy, 
 14  # modify and redistribute granted by the license, users are provided only 
 15  # with a limited warranty  and the software's author,  the holder of the 
 16  # economic rights,  and the successive licensors  have only  limited 
 17  # liability. 
 18  # 
 19  # In this respect, the user's attention is drawn to the risks associated 
 20  # with loading,  using,  modifying and/or developing or reproducing the 
 21  # software by the user in light of its specific status of free software, 
 22  # that may mean  that it is complicated to manipulate,  and  that  also 
 23  # therefore means  that it is reserved for developers  and  experienced 
 24  # professionals having in-depth computer knowledge. Users are therefore 
 25  # encouraged to load and test the software's suitability as regards their 
 26  # requirements in conditions enabling the security of their systems and/or 
 27  # data to be ensured and,  more generally, to use and operate it in the 
 28  # same conditions as regards security. 
 29  # 
 30  # The fact that you are presently reading this means that you have had 
 31  # knowledge of the CeCILL-C license and that you accept its terms. 
 32  # 
 33  # $Id: Pdsh.py 238 2010-02-25 22:30:31Z st-cea $ 
 34   
 35  """ 
 36  WorkerPdsh 
 37   
 38  ClusterShell worker for executing commands with LLNL pdsh. 
 39  """ 
 40   
 41  import errno 
 42  import os 
 43  import signal 
 44  import sys 
 45   
 46  from ClusterShell.NodeSet import NodeSet 
 47  from ClusterShell.Worker.EngineClient import EngineClient 
 48  from ClusterShell.Worker.EngineClient import EngineClientError 
 49  from ClusterShell.Worker.EngineClient import EngineClientNotSupportedError 
 50  from ClusterShell.Worker.Worker import DistantWorker 
 51  from ClusterShell.Worker.Worker import WorkerError, WorkerBadArgumentError 
 52   
 53   
54 -class WorkerPdsh(EngineClient, DistantWorker):
55 """ 56 ClusterShell pdsh-based worker Class. 57 58 Remote Shell (pdsh) usage example: 59 worker = WorkerPdsh(nodeset, handler=MyEventHandler(), 60 timeout=30, command="/bin/hostname") 61 Remote Copy (pdcp) usage example: 62 worker = WorkerPdsh(nodeset, handler=MyEventHandler(), 63 timeout=30, source="/etc/my.conf", 64 dest="/etc/my.conf") 65 ... 66 task.schedule(worker) # schedule worker for execution 67 ... 68 task.resume() # run 69 70 Known Limitations: 71 * write() is not supported by WorkerPdsh 72 * return codes == 0 are not garanteed when a timeout is used (rc > 0 73 are fine) 74 """ 75
76 - def __init__(self, nodes, handler, timeout, **kwargs):
77 """ 78 Initialize Pdsh worker instance. 79 """ 80 DistantWorker.__init__(self, handler) 81 82 self.nodes = NodeSet(nodes) 83 self.closed_nodes = NodeSet() 84 85 self.command = kwargs.get('command') 86 self.source = kwargs.get('source') 87 self.dest = kwargs.get('dest') 88 89 autoclose = kwargs.get('autoclose', False) 90 stderr = kwargs.get('stderr', False) 91 92 EngineClient.__init__(self, self, stderr, timeout, autoclose) 93 94 if self.command is not None: 95 # PDSH 96 self.source = None 97 self.dest = None 98 self.mode = 'pdsh' 99 elif self.source: 100 # PDCP 101 self.command = None 102 self.mode = 'pdcp' 103 self.isdir = os.path.isdir(self.source) 104 # Preserve modification times and modes? 105 self.preserve = kwargs.get('preserve', False) 106 else: 107 raise WorkerBadArgumentError("missing command or source in " \ 108 "WorkerPdsh constructor") 109 self.popen = None 110 self._buf = ""
111
112 - def _engine_clients(self):
113 return [self]
114
115 - def _start(self):
116 """ 117 Start worker, initialize buffers, prepare command. 118 """ 119 # Initialize worker read buffer 120 self._buf = "" 121 122 pdsh_env = {} 123 124 if self.command is not None: 125 # Build pdsh command 126 executable = self.task.info("pdsh_path") or "pdsh" 127 cmd_l = [ executable, "-b" ] 128 129 fanout = self.task.info("fanout", 0) 130 if fanout > 0: 131 cmd_l.append("-f %d" % fanout) 132 133 # Pdsh flag '-t' do not really works well. Better to use 134 # PDSH_SSH_ARGS_APPEND variable to transmit ssh ConnectTimeout 135 # flag. 136 connect_timeout = self.task.info("connect_timeout", 0) 137 if connect_timeout > 0: 138 pdsh_env['PDSH_SSH_ARGS_APPEND'] = "-o ConnectTimeout=%d" % \ 139 connect_timeout 140 141 command_timeout = self.task.info("command_timeout", 0) 142 if command_timeout > 0: 143 cmd_l.append("-u %d" % command_timeout) 144 145 cmd_l.append("-w %s" % self.nodes) 146 cmd_l.append("%s" % self.command) 147 148 if self.task.info("debug", False): 149 self.task.info("print_debug")(self.task, "PDSH: %s" % \ 150 ' '.join(cmd_l)) 151 else: 152 # Build pdcp command 153 executable = self.task.info("pdcp_path") or "pdcp" 154 cmd_l = [ executable, "-b" ] 155 156 fanout = self.task.info("fanout", 0) 157 if fanout > 0: 158 cmd_l.append("-f %d" % fanout) 159 160 connect_timeout = self.task.info("connect_timeout", 0) 161 if connect_timeout > 0: 162 cmd_l.append("-t %d" % connect_timeout) 163 164 cmd_l.append("-w %s" % self.nodes) 165 166 if self.isdir: 167 cmd_l.append("-r") 168 169 if self.preserve: 170 cmd_l.append("-p") 171 172 cmd_l.append(self.source) 173 cmd_l.append(self.dest) 174 175 if self.task.info("debug", False): 176 self.task.info("print_debug")(self.task,"PDCP: %s" % \ 177 ' '.join(cmd_l)) 178 179 self.popen = self._exec_nonblock(cmd_l, env=pdsh_env) 180 self.file_error = self.popen.stderr 181 self.file_reader = self.popen.stdout 182 self.file_writer = self.popen.stdin 183 184 self._on_start() 185 186 return self
187
188 - def _read(self, size=-1):
189 """ 190 Read data from process. 191 """ 192 result = self.file_reader.read(size) 193 if result > 0: 194 self._set_reading() 195 return result
196
197 - def _readerr(self, size=-1):
198 """ 199 Read error from process. 200 """ 201 result = self.file_error.read(size) 202 if result > 0: 203 self._set_reading_error() 204 return result
205
206 - def write(self, buf):
207 """ 208 Write data to process. Not supported with Pdsh worker. 209 """ 210 raise EngineClientNotSupportedError("writing is not " \ 211 "supported by pdsh worker")
212
213 - def _close(self, force, timeout):
214 """ 215 Close worker. Called by engine after worker has been 216 unregistered. This method should handle all termination types 217 (normal, forced or on timeout). 218 """ 219 if force or timeout: 220 prc = self.popen.poll() 221 if prc is None: 222 # process is still running, kill it 223 os.kill(self.popen.pid, signal.SIGKILL) 224 if timeout: 225 self._invoke("ev_timeout") 226 else: 227 prc = self.popen.wait() 228 if prc >= 0: 229 rc = prc 230 if rc != 0: 231 raise WorkerError("Cannot run pdsh (error %d)" % rc) 232 233 # close 234 self.popen.stdin.close() 235 self.popen.stdout.close() 236 237 if timeout: 238 for node in (self.nodes - self.closed_nodes): 239 self._on_node_timeout(node) 240 else: 241 for node in (self.nodes - self.closed_nodes): 242 self._on_node_rc(node, 0) 243 244 self._invoke("ev_close")
245
246 - def _parse_line(self, line, stderr):
247 """ 248 Parse Pdsh line syntax. 249 """ 250 if line.startswith("pdsh@") or \ 251 line.startswith("pdcp@") or \ 252 line.startswith("sending "): 253 try: 254 # pdsh@cors113: cors115: ssh exited with exit code 1 255 # 0 1 2 3 4 5 6 7 256 # corsUNKN: ssh: corsUNKN: Name or service not known 257 # 0 1 2 3 4 5 6 7 258 # pdsh@fortoy0: fortoy101: command timeout 259 # 0 1 2 3 260 # sending SIGTERM to ssh fortoy112 pid 32014 261 # 0 1 2 3 4 5 6 262 # pdcp@cors113: corsUNKN: ssh exited with exit code 255 263 # 0 1 2 3 4 5 6 7 264 # pdcp@cors113: cors115: fatal: /var/cache/shine/... 265 # 0 1 2 3... 266 267 words = line.split() 268 # Set return code for nodename of worker 269 if self.mode == 'pdsh': 270 if len(words) == 4 and words[2] == "command" and \ 271 words[3] == "timeout": 272 pass 273 elif len(words) == 8 and words[3] == "exited" and \ 274 words[7].isdigit(): 275 self._on_node_rc(words[1][:-1], int(words[7])) 276 elif self.mode == 'pdcp': 277 self._on_node_rc(words[1][:-1], errno.ENOENT) 278 279 except Exception, e: 280 print >> sys.stderr, e 281 raise EngineClientError() 282 else: 283 # split pdsh reply "nodename: msg" 284 nodename, msg = line.split(': ', 1) 285 if stderr: 286 self._on_node_errline(nodename, msg) 287 else: 288 self._on_node_msgline(nodename, msg)
289
290 - def _handle_read(self):
291 """ 292 Engine is telling us a read is available. 293 """ 294 debug = self.task.info("debug", False) 295 if debug: 296 print_debug = self.task.info("print_debug") 297 298 for msg in self._readlines(): 299 if debug: 300 print_debug(self.task, "PDSH: %s" % msg) 301 self._parse_line(msg, False)
302
303 - def _handle_error(self):
304 """ 305 Engine is telling us an error read is available. 306 """ 307 debug = self.worker.task.info("debug", False) 308 if debug: 309 print_debug = self.worker.task.info("print_debug") 310 311 for msg in self._readerrlines(): 312 if debug: 313 print_debug(self.task, "PDSH@STDERR: %s" % msg) 314 self._parse_line(msg, True)
315
316 - def _on_node_rc(self, node, rc):
317 """ 318 Return code received from a node, update last* stuffs. 319 """ 320 DistantWorker._on_node_rc(self, node, rc) 321 self.closed_nodes.add(node)
322