fileutils.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402
  1. # copyright 2003-2011 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
  2. # contact http://www.logilab.fr/ -- mailto:contact@logilab.fr
  3. #
  4. # This file is part of logilab-common.
  5. #
  6. # logilab-common is free software: you can redistribute it and/or modify it under
  7. # the terms of the GNU Lesser General Public License as published by the Free
  8. # Software Foundation, either version 2.1 of the License, or (at your option) any
  9. # later version.
  10. #
  11. # logilab-common is distributed in the hope that it will be useful, but WITHOUT
  12. # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  13. # FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
  14. # details.
  15. #
  16. # You should have received a copy of the GNU Lesser General Public License along
  17. # with logilab-common. If not, see <http://www.gnu.org/licenses/>.
  18. """File and file-path manipulation utilities.
  19. :group path manipulation: first_level_directory, relative_path, is_binary,\
  20. get_by_ext, remove_dead_links
  21. :group file manipulation: norm_read, norm_open, lines, stream_lines, lines,\
  22. write_open_mode, ensure_fs_mode, export
  23. :sort: path manipulation, file manipulation
  24. """
  25. __docformat__ = "restructuredtext en"
  26. import sys
  27. import shutil
  28. import mimetypes
  29. from os.path import isabs, isdir, islink, split, exists, normpath, join
  30. from os.path import abspath
  31. from os import sep, mkdir, remove, listdir, stat, chmod, walk
  32. from stat import ST_MODE, S_IWRITE
  33. from cStringIO import StringIO
  34. from logilab.common import STD_BLACKLIST as BASE_BLACKLIST, IGNORED_EXTENSIONS
  35. from logilab.common.shellutils import find
  36. from logilab.common.deprecation import deprecated
  37. from logilab.common.compat import FileIO, any
  38. def first_level_directory(path):
  39. """Return the first level directory of a path.
  40. >>> first_level_directory('home/syt/work')
  41. 'home'
  42. >>> first_level_directory('/home/syt/work')
  43. '/'
  44. >>> first_level_directory('work')
  45. 'work'
  46. >>>
  47. :type path: str
  48. :param path: the path for which we want the first level directory
  49. :rtype: str
  50. :return: the first level directory appearing in `path`
  51. """
  52. head, tail = split(path)
  53. while head and tail:
  54. head, tail = split(head)
  55. if tail:
  56. return tail
  57. # path was absolute, head is the fs root
  58. return head
  59. def abspath_listdir(path):
  60. """Lists path's content using absolute paths.
  61. >>> os.listdir('/home')
  62. ['adim', 'alf', 'arthur', 'auc']
  63. >>> abspath_listdir('/home')
  64. ['/home/adim', '/home/alf', '/home/arthur', '/home/auc']
  65. """
  66. path = abspath(path)
  67. return [join(path, filename) for filename in listdir(path)]
  68. def is_binary(filename):
  69. """Return true if filename may be a binary file, according to it's
  70. extension.
  71. :type filename: str
  72. :param filename: the name of the file
  73. :rtype: bool
  74. :return:
  75. true if the file is a binary file (actually if it's mime type
  76. isn't beginning by text/)
  77. """
  78. try:
  79. return not mimetypes.guess_type(filename)[0].startswith('text')
  80. except AttributeError:
  81. return 1
  82. def write_open_mode(filename):
  83. """Return the write mode that should used to open file.
  84. :type filename: str
  85. :param filename: the name of the file
  86. :rtype: str
  87. :return: the mode that should be use to open the file ('w' or 'wb')
  88. """
  89. if is_binary(filename):
  90. return 'wb'
  91. return 'w'
  92. def ensure_fs_mode(filepath, desired_mode=S_IWRITE):
  93. """Check that the given file has the given mode(s) set, else try to
  94. set it.
  95. :type filepath: str
  96. :param filepath: path of the file
  97. :type desired_mode: int
  98. :param desired_mode:
  99. ORed flags describing the desired mode. Use constants from the
  100. `stat` module for file permission's modes
  101. """
  102. mode = stat(filepath)[ST_MODE]
  103. if not mode & desired_mode:
  104. chmod(filepath, mode | desired_mode)
  105. # XXX (syt) unused? kill?
  106. class ProtectedFile(FileIO):
  107. """A special file-object class that automatically does a 'chmod +w' when
  108. needed.
  109. XXX: for now, the way it is done allows 'normal file-objects' to be
  110. created during the ProtectedFile object lifetime.
  111. One way to circumvent this would be to chmod / unchmod on each
  112. write operation.
  113. One other way would be to :
  114. - catch the IOError in the __init__
  115. - if IOError, then create a StringIO object
  116. - each write operation writes in this StringIO object
  117. - on close()/del(), write/append the StringIO content to the file and
  118. do the chmod only once
  119. """
  120. def __init__(self, filepath, mode):
  121. self.original_mode = stat(filepath)[ST_MODE]
  122. self.mode_changed = False
  123. if mode in ('w', 'a', 'wb', 'ab'):
  124. if not self.original_mode & S_IWRITE:
  125. chmod(filepath, self.original_mode | S_IWRITE)
  126. self.mode_changed = True
  127. FileIO.__init__(self, filepath, mode)
  128. def _restore_mode(self):
  129. """restores the original mode if needed"""
  130. if self.mode_changed:
  131. chmod(self.name, self.original_mode)
  132. # Don't re-chmod in case of several restore
  133. self.mode_changed = False
  134. def close(self):
  135. """restore mode before closing"""
  136. self._restore_mode()
  137. FileIO.close(self)
  138. def __del__(self):
  139. if not self.closed:
  140. self.close()
  141. class UnresolvableError(Exception):
  142. """Exception raised by relative path when it's unable to compute relative
  143. path between two paths.
  144. """
  145. def relative_path(from_file, to_file):
  146. """Try to get a relative path from `from_file` to `to_file`
  147. (path will be absolute if to_file is an absolute file). This function
  148. is useful to create link in `from_file` to `to_file`. This typical use
  149. case is used in this function description.
  150. If both files are relative, they're expected to be relative to the same
  151. directory.
  152. >>> relative_path( from_file='toto/index.html', to_file='index.html')
  153. '../index.html'
  154. >>> relative_path( from_file='index.html', to_file='toto/index.html')
  155. 'toto/index.html'
  156. >>> relative_path( from_file='tutu/index.html', to_file='toto/index.html')
  157. '../toto/index.html'
  158. >>> relative_path( from_file='toto/index.html', to_file='/index.html')
  159. '/index.html'
  160. >>> relative_path( from_file='/toto/index.html', to_file='/index.html')
  161. '../index.html'
  162. >>> relative_path( from_file='/toto/index.html', to_file='/toto/summary.html')
  163. 'summary.html'
  164. >>> relative_path( from_file='index.html', to_file='index.html')
  165. ''
  166. >>> relative_path( from_file='/index.html', to_file='toto/index.html')
  167. Traceback (most recent call last):
  168. File "<string>", line 1, in ?
  169. File "<stdin>", line 37, in relative_path
  170. UnresolvableError
  171. >>> relative_path( from_file='/index.html', to_file='/index.html')
  172. ''
  173. >>>
  174. :type from_file: str
  175. :param from_file: source file (where links will be inserted)
  176. :type to_file: str
  177. :param to_file: target file (on which links point)
  178. :raise UnresolvableError: if it has been unable to guess a correct path
  179. :rtype: str
  180. :return: the relative path of `to_file` from `from_file`
  181. """
  182. from_file = normpath(from_file)
  183. to_file = normpath(to_file)
  184. if from_file == to_file:
  185. return ''
  186. if isabs(to_file):
  187. if not isabs(from_file):
  188. return to_file
  189. elif isabs(from_file):
  190. raise UnresolvableError()
  191. from_parts = from_file.split(sep)
  192. to_parts = to_file.split(sep)
  193. idem = 1
  194. result = []
  195. while len(from_parts) > 1:
  196. dirname = from_parts.pop(0)
  197. if idem and len(to_parts) > 1 and dirname == to_parts[0]:
  198. to_parts.pop(0)
  199. else:
  200. idem = 0
  201. result.append('..')
  202. result += to_parts
  203. return sep.join(result)
  204. def norm_read(path):
  205. """Return the content of the file with normalized line feeds.
  206. :type path: str
  207. :param path: path to the file to read
  208. :rtype: str
  209. :return: the content of the file with normalized line feeds
  210. """
  211. return open(path, 'U').read()
  212. norm_read = deprecated("use \"open(path, 'U').read()\"")(norm_read)
  213. def norm_open(path):
  214. """Return a stream for a file with content with normalized line feeds.
  215. :type path: str
  216. :param path: path to the file to open
  217. :rtype: file or StringIO
  218. :return: the opened file with normalized line feeds
  219. """
  220. return open(path, 'U')
  221. norm_open = deprecated("use \"open(path, 'U')\"")(norm_open)
  222. def lines(path, comments=None):
  223. """Return a list of non empty lines in the file located at `path`.
  224. :type path: str
  225. :param path: path to the file
  226. :type comments: str or None
  227. :param comments:
  228. optional string which can be used to comment a line in the file
  229. (i.e. lines starting with this string won't be returned)
  230. :rtype: list
  231. :return:
  232. a list of stripped line in the file, without empty and commented
  233. lines
  234. :warning: at some point this function will probably return an iterator
  235. """
  236. stream = open(path, 'U')
  237. result = stream_lines(stream, comments)
  238. stream.close()
  239. return result
  240. def stream_lines(stream, comments=None):
  241. """Return a list of non empty lines in the given `stream`.
  242. :type stream: object implementing 'xreadlines' or 'readlines'
  243. :param stream: file like object
  244. :type comments: str or None
  245. :param comments:
  246. optional string which can be used to comment a line in the file
  247. (i.e. lines starting with this string won't be returned)
  248. :rtype: list
  249. :return:
  250. a list of stripped line in the file, without empty and commented
  251. lines
  252. :warning: at some point this function will probably return an iterator
  253. """
  254. try:
  255. readlines = stream.xreadlines
  256. except AttributeError:
  257. readlines = stream.readlines
  258. result = []
  259. for line in readlines():
  260. line = line.strip()
  261. if line and (comments is None or not line.startswith(comments)):
  262. result.append(line)
  263. return result
  264. def export(from_dir, to_dir,
  265. blacklist=BASE_BLACKLIST, ignore_ext=IGNORED_EXTENSIONS,
  266. verbose=0):
  267. """Make a mirror of `from_dir` in `to_dir`, omitting directories and
  268. files listed in the black list or ending with one of the given
  269. extensions.
  270. :type from_dir: str
  271. :param from_dir: directory to export
  272. :type to_dir: str
  273. :param to_dir: destination directory
  274. :type blacklist: list or tuple
  275. :param blacklist:
  276. list of files or directories to ignore, default to the content of
  277. `BASE_BLACKLIST`
  278. :type ignore_ext: list or tuple
  279. :param ignore_ext:
  280. list of extensions to ignore, default to the content of
  281. `IGNORED_EXTENSIONS`
  282. :type verbose: bool
  283. :param verbose:
  284. flag indicating whether information about exported files should be
  285. printed to stderr, default to False
  286. """
  287. try:
  288. mkdir(to_dir)
  289. except OSError:
  290. pass # FIXME we should use "exists" if the point is about existing dir
  291. # else (permission problems?) shouldn't return / raise ?
  292. for directory, dirnames, filenames in walk(from_dir):
  293. for norecurs in blacklist:
  294. try:
  295. dirnames.remove(norecurs)
  296. except ValueError:
  297. continue
  298. for dirname in dirnames:
  299. src = join(directory, dirname)
  300. dest = to_dir + src[len(from_dir):]
  301. if isdir(src):
  302. if not exists(dest):
  303. mkdir(dest)
  304. for filename in filenames:
  305. # don't include binary files
  306. # endswith does not accept tuple in 2.4
  307. if any([filename.endswith(ext) for ext in ignore_ext]):
  308. continue
  309. src = join(directory, filename)
  310. dest = to_dir + src[len(from_dir):]
  311. if verbose:
  312. print >> sys.stderr, src, '->', dest
  313. if exists(dest):
  314. remove(dest)
  315. shutil.copy2(src, dest)
  316. def remove_dead_links(directory, verbose=0):
  317. """Recursively traverse directory and remove all dead links.
  318. :type directory: str
  319. :param directory: directory to cleanup
  320. :type verbose: bool
  321. :param verbose:
  322. flag indicating whether information about deleted links should be
  323. printed to stderr, default to False
  324. """
  325. for dirpath, dirname, filenames in walk(directory):
  326. for filename in dirnames + filenames:
  327. src = join(dirpath, filename)
  328. if islink(src) and not exists(src):
  329. if verbose:
  330. print 'remove dead link', src
  331. remove(src)