proc.py 9.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277
  1. # copyright 2003-2011 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
  2. # contact http://www.logilab.fr/ -- mailto:contact@logilab.fr
  3. #
  4. # This file is part of logilab-common.
  5. #
  6. # logilab-common is free software: you can redistribute it and/or modify it under
  7. # the terms of the GNU Lesser General Public License as published by the Free
  8. # Software Foundation, either version 2.1 of the License, or (at your option) any
  9. # later version.
  10. #
  11. # logilab-common is distributed in the hope that it will be useful, but WITHOUT
  12. # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  13. # FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
  14. # details.
  15. #
  16. # You should have received a copy of the GNU Lesser General Public License along
  17. # with logilab-common. If not, see <http://www.gnu.org/licenses/>.
  18. """module providing:
  19. * process information (linux specific: rely on /proc)
  20. * a class for resource control (memory / time / cpu time)
  21. This module doesn't work on windows platforms (only tested on linux)
  22. :organization: Logilab
  23. """
  24. __docformat__ = "restructuredtext en"
  25. import os
  26. import stat
  27. from resource import getrlimit, setrlimit, RLIMIT_CPU, RLIMIT_AS
  28. from signal import signal, SIGXCPU, SIGKILL, SIGUSR2, SIGUSR1
  29. from threading import Timer, currentThread, Thread, Event
  30. from time import time
  31. from logilab.common.tree import Node
  32. class NoSuchProcess(Exception): pass
  33. def proc_exists(pid):
  34. """check the a pid is registered in /proc
  35. raise NoSuchProcess exception if not
  36. """
  37. if not os.path.exists('/proc/%s' % pid):
  38. raise NoSuchProcess()
  39. PPID = 3
  40. UTIME = 13
  41. STIME = 14
  42. CUTIME = 15
  43. CSTIME = 16
  44. VSIZE = 22
  45. class ProcInfo(Node):
  46. """provide access to process information found in /proc"""
  47. def __init__(self, pid):
  48. self.pid = int(pid)
  49. Node.__init__(self, self.pid)
  50. proc_exists(self.pid)
  51. self.file = '/proc/%s/stat' % self.pid
  52. self.ppid = int(self.status()[PPID])
  53. def memory_usage(self):
  54. """return the memory usage of the process in Ko"""
  55. try :
  56. return int(self.status()[VSIZE])
  57. except IOError:
  58. return 0
  59. def lineage_memory_usage(self):
  60. return self.memory_usage() + sum([child.lineage_memory_usage()
  61. for child in self.children])
  62. def time(self, children=0):
  63. """return the number of jiffies that this process has been scheduled
  64. in user and kernel mode"""
  65. status = self.status()
  66. time = int(status[UTIME]) + int(status[STIME])
  67. if children:
  68. time += int(status[CUTIME]) + int(status[CSTIME])
  69. return time
  70. def status(self):
  71. """return the list of fields found in /proc/<pid>/stat"""
  72. return open(self.file).read().split()
  73. def name(self):
  74. """return the process name found in /proc/<pid>/stat
  75. """
  76. return self.status()[1].strip('()')
  77. def age(self):
  78. """return the age of the process
  79. """
  80. return os.stat(self.file)[stat.ST_MTIME]
  81. class ProcInfoLoader:
  82. """manage process information"""
  83. def __init__(self):
  84. self._loaded = {}
  85. def list_pids(self):
  86. """return a list of existent process ids"""
  87. for subdir in os.listdir('/proc'):
  88. if subdir.isdigit():
  89. yield int(subdir)
  90. def load(self, pid):
  91. """get a ProcInfo object for a given pid"""
  92. pid = int(pid)
  93. try:
  94. return self._loaded[pid]
  95. except KeyError:
  96. procinfo = ProcInfo(pid)
  97. procinfo.manager = self
  98. self._loaded[pid] = procinfo
  99. return procinfo
  100. def load_all(self):
  101. """load all processes information"""
  102. for pid in self.list_pids():
  103. try:
  104. procinfo = self.load(pid)
  105. if procinfo.parent is None and procinfo.ppid:
  106. pprocinfo = self.load(procinfo.ppid)
  107. pprocinfo.append(procinfo)
  108. except NoSuchProcess:
  109. pass
  110. try:
  111. class ResourceError(BaseException):
  112. """Error raise when resource limit is reached"""
  113. limit = "Unknown Resource Limit"
  114. except NameError:
  115. class ResourceError(Exception):
  116. """Error raise when resource limit is reached"""
  117. limit = "Unknown Resource Limit"
  118. class XCPUError(ResourceError):
  119. """Error raised when CPU Time limit is reached"""
  120. limit = "CPU Time"
  121. class LineageMemoryError(ResourceError):
  122. """Error raised when the total amount of memory used by a process and
  123. it's child is reached"""
  124. limit = "Lineage total Memory"
  125. class TimeoutError(ResourceError):
  126. """Error raised when the process is running for to much time"""
  127. limit = "Real Time"
  128. # Can't use subclass because the StandardError MemoryError raised
  129. RESOURCE_LIMIT_EXCEPTION = (ResourceError, MemoryError)
  130. class MemorySentinel(Thread):
  131. """A class checking a process don't use too much memory in a separated
  132. daemonic thread
  133. """
  134. def __init__(self, interval, memory_limit, gpid=os.getpid()):
  135. Thread.__init__(self, target=self._run, name="Test.Sentinel")
  136. self.memory_limit = memory_limit
  137. self._stop = Event()
  138. self.interval = interval
  139. self.setDaemon(True)
  140. self.gpid = gpid
  141. def stop(self):
  142. """stop ap"""
  143. self._stop.set()
  144. def _run(self):
  145. pil = ProcInfoLoader()
  146. while not self._stop.isSet():
  147. if self.memory_limit <= pil.load(self.gpid).lineage_memory_usage():
  148. os.killpg(self.gpid, SIGUSR1)
  149. self._stop.wait(self.interval)
  150. class ResourceController:
  151. def __init__(self, max_cpu_time=None, max_time=None, max_memory=None,
  152. max_reprieve=60):
  153. if SIGXCPU == -1:
  154. raise RuntimeError("Unsupported platform")
  155. self.max_time = max_time
  156. self.max_memory = max_memory
  157. self.max_cpu_time = max_cpu_time
  158. self._reprieve = max_reprieve
  159. self._timer = None
  160. self._msentinel = None
  161. self._old_max_memory = None
  162. self._old_usr1_hdlr = None
  163. self._old_max_cpu_time = None
  164. self._old_usr2_hdlr = None
  165. self._old_sigxcpu_hdlr = None
  166. self._limit_set = 0
  167. self._abort_try = 0
  168. self._start_time = None
  169. self._elapse_time = 0
  170. def _hangle_sig_timeout(self, sig, frame):
  171. raise TimeoutError()
  172. def _hangle_sig_memory(self, sig, frame):
  173. if self._abort_try < self._reprieve:
  174. self._abort_try += 1
  175. raise LineageMemoryError("Memory limit reached")
  176. else:
  177. os.killpg(os.getpid(), SIGKILL)
  178. def _handle_sigxcpu(self, sig, frame):
  179. if self._abort_try < self._reprieve:
  180. self._abort_try += 1
  181. raise XCPUError("Soft CPU time limit reached")
  182. else:
  183. os.killpg(os.getpid(), SIGKILL)
  184. def _time_out(self):
  185. if self._abort_try < self._reprieve:
  186. self._abort_try += 1
  187. os.killpg(os.getpid(), SIGUSR2)
  188. if self._limit_set > 0:
  189. self._timer = Timer(1, self._time_out)
  190. self._timer.start()
  191. else:
  192. os.killpg(os.getpid(), SIGKILL)
  193. def setup_limit(self):
  194. """set up the process limit"""
  195. assert currentThread().getName() == 'MainThread'
  196. os.setpgrp()
  197. if self._limit_set <= 0:
  198. if self.max_time is not None:
  199. self._old_usr2_hdlr = signal(SIGUSR2, self._hangle_sig_timeout)
  200. self._timer = Timer(max(1, int(self.max_time) - self._elapse_time),
  201. self._time_out)
  202. self._start_time = int(time())
  203. self._timer.start()
  204. if self.max_cpu_time is not None:
  205. self._old_max_cpu_time = getrlimit(RLIMIT_CPU)
  206. cpu_limit = (int(self.max_cpu_time), self._old_max_cpu_time[1])
  207. self._old_sigxcpu_hdlr = signal(SIGXCPU, self._handle_sigxcpu)
  208. setrlimit(RLIMIT_CPU, cpu_limit)
  209. if self.max_memory is not None:
  210. self._msentinel = MemorySentinel(1, int(self.max_memory) )
  211. self._old_max_memory = getrlimit(RLIMIT_AS)
  212. self._old_usr1_hdlr = signal(SIGUSR1, self._hangle_sig_memory)
  213. as_limit = (int(self.max_memory), self._old_max_memory[1])
  214. setrlimit(RLIMIT_AS, as_limit)
  215. self._msentinel.start()
  216. self._limit_set += 1
  217. def clean_limit(self):
  218. """reinstall the old process limit"""
  219. if self._limit_set > 0:
  220. if self.max_time is not None:
  221. self._timer.cancel()
  222. self._elapse_time += int(time())-self._start_time
  223. self._timer = None
  224. signal(SIGUSR2, self._old_usr2_hdlr)
  225. if self.max_cpu_time is not None:
  226. setrlimit(RLIMIT_CPU, self._old_max_cpu_time)
  227. signal(SIGXCPU, self._old_sigxcpu_hdlr)
  228. if self.max_memory is not None:
  229. self._msentinel.stop()
  230. self._msentinel = None
  231. setrlimit(RLIMIT_AS, self._old_max_memory)
  232. signal(SIGUSR1, self._old_usr1_hdlr)
  233. self._limit_set -= 1