pdf_ext.py 2.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111
  1. # copyright 2003-2011 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
  2. # contact http://www.logilab.fr/ -- mailto:contact@logilab.fr
  3. #
  4. # This file is part of logilab-common.
  5. #
  6. # logilab-common is free software: you can redistribute it and/or modify it under
  7. # the terms of the GNU Lesser General Public License as published by the Free
  8. # Software Foundation, either version 2.1 of the License, or (at your option) any
  9. # later version.
  10. #
  11. # logilab-common is distributed in the hope that it will be useful, but WITHOUT
  12. # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  13. # FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
  14. # details.
  15. #
  16. # You should have received a copy of the GNU Lesser General Public License along
  17. # with logilab-common. If not, see <http://www.gnu.org/licenses/>.
  18. """Manipulate pdf and fdf files (pdftk recommended).
  19. Notes regarding pdftk, pdf forms and fdf files (form definition file)
  20. fields names can be extracted with:
  21. pdftk orig.pdf generate_fdf output truc.fdf
  22. to merge fdf and pdf:
  23. pdftk orig.pdf fill_form test.fdf output result.pdf [flatten]
  24. without flatten, one could further edit the resulting form.
  25. with flatten, everything is turned into text.
  26. """
  27. __docformat__ = "restructuredtext en"
  28. # XXX seems very unix specific
  29. # TODO: check availability of pdftk at import
  30. import os
  31. HEAD="""%FDF-1.2
  32. %\xE2\xE3\xCF\xD3
  33. 1 0 obj
  34. <<
  35. /FDF
  36. <<
  37. /Fields [
  38. """
  39. TAIL="""]
  40. >>
  41. >>
  42. endobj
  43. trailer
  44. <<
  45. /Root 1 0 R
  46. >>
  47. %%EOF
  48. """
  49. def output_field( f ):
  50. return "\xfe\xff" + "".join( [ "\x00"+c for c in f ] )
  51. def extract_keys(lines):
  52. keys = []
  53. for line in lines:
  54. if line.startswith('/V'):
  55. pass #print 'value',line
  56. elif line.startswith('/T'):
  57. key = line[7:-2]
  58. key = ''.join(key.split('\x00'))
  59. keys.append( key )
  60. return keys
  61. def write_field(out, key, value):
  62. out.write("<<\n")
  63. if value:
  64. out.write("/V (%s)\n" %value)
  65. else:
  66. out.write("/V /\n")
  67. out.write("/T (%s)\n" % output_field(key) )
  68. out.write(">> \n")
  69. def write_fields(out, fields):
  70. out.write(HEAD)
  71. for (key, value, comment) in fields:
  72. write_field(out, key, value)
  73. write_field(out, key+"a", value) # pour copie-carbone sur autres pages
  74. out.write(TAIL)
  75. def extract_keys_from_pdf(filename):
  76. # what about using 'pdftk filename dump_data_fields' and parsing the output ?
  77. os.system('pdftk %s generate_fdf output /tmp/toto.fdf' % filename)
  78. lines = file('/tmp/toto.fdf').readlines()
  79. return extract_keys(lines)
  80. def fill_pdf(infile, outfile, fields):
  81. write_fields(file('/tmp/toto.fdf', 'w'), fields)
  82. os.system('pdftk %s fill_form /tmp/toto.fdf output %s flatten' % (infile, outfile))
  83. def testfill_pdf(infile, outfile):
  84. keys = extract_keys_from_pdf(infile)
  85. fields = []
  86. for key in keys:
  87. fields.append( (key, key, '') )
  88. fill_pdf(infile, outfile, fields)