1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35 """
36 WorkerPdsh
37
38 ClusterShell worker for executing commands with LLNL pdsh.
39 """
40
41 import errno
42 import os
43 import signal
44 import sys
45
46 from ClusterShell.NodeSet import NodeSet
47 from ClusterShell.Worker.EngineClient import EngineClient
48 from ClusterShell.Worker.EngineClient import EngineClientError
49 from ClusterShell.Worker.EngineClient import EngineClientNotSupportedError
50 from ClusterShell.Worker.Worker import DistantWorker
51 from ClusterShell.Worker.Worker import WorkerError, WorkerBadArgumentError
52
53
55 """
56 ClusterShell pdsh-based worker Class.
57
58 Remote Shell (pdsh) usage example:
59 worker = WorkerPdsh(nodeset, handler=MyEventHandler(),
60 timeout=30, command="/bin/hostname")
61 Remote Copy (pdcp) usage example:
62 worker = WorkerPdsh(nodeset, handler=MyEventHandler(),
63 timeout=30, source="/etc/my.conf",
64 dest="/etc/my.conf")
65 ...
66 task.schedule(worker) # schedule worker for execution
67 ...
68 task.resume() # run
69
70 Known Limitations:
71 * write() is not supported by WorkerPdsh
72 * return codes == 0 are not garanteed when a timeout is used (rc > 0
73 are fine)
74 """
75
76 - def __init__(self, nodes, handler, timeout, **kwargs):
77 """
78 Initialize Pdsh worker instance.
79 """
80 DistantWorker.__init__(self, handler)
81
82 self.nodes = NodeSet(nodes)
83 self.closed_nodes = NodeSet()
84
85 self.command = kwargs.get('command')
86 self.source = kwargs.get('source')
87 self.dest = kwargs.get('dest')
88
89 autoclose = kwargs.get('autoclose', False)
90 stderr = kwargs.get('stderr', False)
91
92 EngineClient.__init__(self, self, stderr, timeout, autoclose)
93
94 if self.command is not None:
95
96 self.source = None
97 self.dest = None
98 self.mode = 'pdsh'
99 elif self.source:
100
101 self.command = None
102 self.mode = 'pdcp'
103 self.isdir = os.path.isdir(self.source)
104
105 self.preserve = kwargs.get('preserve', False)
106 else:
107 raise WorkerBadArgumentError("missing command or source in " \
108 "WorkerPdsh constructor")
109 self.popen = None
110 self._buf = ""
111
114
116 """
117 Start worker, initialize buffers, prepare command.
118 """
119
120 self._buf = ""
121
122 pdsh_env = {}
123
124 if self.command is not None:
125
126 executable = self.task.info("pdsh_path") or "pdsh"
127 cmd_l = [ executable, "-b" ]
128
129 fanout = self.task.info("fanout", 0)
130 if fanout > 0:
131 cmd_l.append("-f %d" % fanout)
132
133
134
135
136 connect_timeout = self.task.info("connect_timeout", 0)
137 if connect_timeout > 0:
138 pdsh_env['PDSH_SSH_ARGS_APPEND'] = "-o ConnectTimeout=%d" % \
139 connect_timeout
140
141 command_timeout = self.task.info("command_timeout", 0)
142 if command_timeout > 0:
143 cmd_l.append("-u %d" % command_timeout)
144
145 cmd_l.append("-w %s" % self.nodes)
146 cmd_l.append("%s" % self.command)
147
148 if self.task.info("debug", False):
149 self.task.info("print_debug")(self.task, "PDSH: %s" % \
150 ' '.join(cmd_l))
151 else:
152
153 executable = self.task.info("pdcp_path") or "pdcp"
154 cmd_l = [ executable, "-b" ]
155
156 fanout = self.task.info("fanout", 0)
157 if fanout > 0:
158 cmd_l.append("-f %d" % fanout)
159
160 connect_timeout = self.task.info("connect_timeout", 0)
161 if connect_timeout > 0:
162 cmd_l.append("-t %d" % connect_timeout)
163
164 cmd_l.append("-w %s" % self.nodes)
165
166 if self.isdir:
167 cmd_l.append("-r")
168
169 if self.preserve:
170 cmd_l.append("-p")
171
172 cmd_l.append(self.source)
173 cmd_l.append(self.dest)
174
175 if self.task.info("debug", False):
176 self.task.info("print_debug")(self.task,"PDCP: %s" % \
177 ' '.join(cmd_l))
178
179 self.popen = self._exec_nonblock(cmd_l, env=pdsh_env)
180 self.file_error = self.popen.stderr
181 self.file_reader = self.popen.stdout
182 self.file_writer = self.popen.stdin
183
184 self._on_start()
185
186 return self
187
188 - def _read(self, size=-1):
189 """
190 Read data from process.
191 """
192 result = self.file_reader.read(size)
193 if result > 0:
194 self._set_reading()
195 return result
196
198 """
199 Read error from process.
200 """
201 result = self.file_error.read(size)
202 if result > 0:
203 self._set_reading_error()
204 return result
205
207 """
208 Write data to process. Not supported with Pdsh worker.
209 """
210 raise EngineClientNotSupportedError("writing is not " \
211 "supported by pdsh worker")
212
213 - def _close(self, force, timeout):
214 """
215 Close worker. Called by engine after worker has been
216 unregistered. This method should handle all termination types
217 (normal, forced or on timeout).
218 """
219 if force or timeout:
220 prc = self.popen.poll()
221 if prc is None:
222
223 os.kill(self.popen.pid, signal.SIGKILL)
224 if timeout:
225 self._invoke("ev_timeout")
226 else:
227 prc = self.popen.wait()
228 if prc >= 0:
229 rc = prc
230 if rc != 0:
231 raise WorkerError("Cannot run pdsh (error %d)" % rc)
232
233
234 self.popen.stdin.close()
235 self.popen.stdout.close()
236
237 if timeout:
238 for node in (self.nodes - self.closed_nodes):
239 self._on_node_timeout(node)
240 else:
241 for node in (self.nodes - self.closed_nodes):
242 self._on_node_rc(node, 0)
243
244 self._invoke("ev_close")
245
247 """
248 Parse Pdsh line syntax.
249 """
250 if line.startswith("pdsh@") or \
251 line.startswith("pdcp@") or \
252 line.startswith("sending "):
253 try:
254
255
256
257
258
259
260
261
262
263
264
265
266
267 words = line.split()
268
269 if self.mode == 'pdsh':
270 if len(words) == 4 and words[2] == "command" and \
271 words[3] == "timeout":
272 pass
273 elif len(words) == 8 and words[3] == "exited" and \
274 words[7].isdigit():
275 self._on_node_rc(words[1][:-1], int(words[7]))
276 elif self.mode == 'pdcp':
277 self._on_node_rc(words[1][:-1], errno.ENOENT)
278
279 except Exception, e:
280 print >> sys.stderr, e
281 raise EngineClientError()
282 else:
283
284 nodename, msg = line.split(': ', 1)
285 if stderr:
286 self._on_node_errline(nodename, msg)
287 else:
288 self._on_node_msgline(nodename, msg)
289
291 """
292 Engine is telling us a read is available.
293 """
294 debug = self.task.info("debug", False)
295 if debug:
296 print_debug = self.task.info("print_debug")
297
298 for msg in self._readlines():
299 if debug:
300 print_debug(self.task, "PDSH: %s" % msg)
301 self._parse_line(msg, False)
302
304 """
305 Engine is telling us an error read is available.
306 """
307 debug = self.worker.task.info("debug", False)
308 if debug:
309 print_debug = self.worker.task.info("print_debug")
310
311 for msg in self._readerrlines():
312 if debug:
313 print_debug(self.task, "PDSH@STDERR: %s" % msg)
314 self._parse_line(msg, True)
315
317 """
318 Return code received from a node, update last* stuffs.
319 """
320 DistantWorker._on_node_rc(self, node, rc)
321 self.closed_nodes.add(node)
322