PEEL Shopping
Open source ecommerce : PEEL Shopping
tcpdf_parser.php
Go to the documentation of this file.
1 <?php
2 //============================================================+
3 // File name : tcpdf_parser.php
4 // Version : 1.0.001
5 // Begin : 2011-05-23
6 // Last Update : 2012-05-03
7 // Author : Nicola Asuni - Tecnick.com LTD - Manor Coach House, Church Hill, Aldershot, Hants, GU12 4RQ, UK - www.tecnick.com - info@tecnick.com
8 // License : http://www.tecnick.com/pagefiles/tcpdf/LICENSE.TXT GNU-LGPLv3
9 // -------------------------------------------------------------------
10 // Copyright (C) 2011-2012 Nicola Asuni - Tecnick.com LTD
11 //
12 // This file is part of TCPDF software library.
13 //
14 // TCPDF is free software: you can redistribute it and/or modify it
15 // under the terms of the GNU Lesser General Public License as
16 // published by the Free Software Foundation, either version 3 of the
17 // License, or (at your option) any later version.
18 //
19 // TCPDF is distributed in the hope that it will be useful, but
20 // WITHOUT ANY WARRANTY; without even the implied warranty of
21 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
22 // See the GNU Lesser General Public License for more details.
23 //
24 // You should have received a copy of the License
25 // along with TCPDF. If not, see
26 // <http://www.tecnick.com/pagefiles/tcpdf/LICENSE.TXT>.
27 //
28 // See LICENSE.TXT file for more information.
29 // -------------------------------------------------------------------
30 //
31 // Description : This is a PHP class for parsing PDF documents.
32 //
33 //============================================================+
34 
43 // include class for decoding filters
44 require_once(dirname(__FILE__).'/tcpdf_filters.php');
45 
54 class TCPDF_PARSER {
55 
60  private $pdfdata = '';
61 
66  protected $xref = array();
67 
72  protected $objects = array();
73 
78  private $FilterDecoders;
79 
80 // -----------------------------------------------------------------------------
81 
88  public function __construct($data) {
89  if (empty($data)) {
90  $this->Error('Empty PDF data.');
91  }
92  $this->pdfdata = $data;
93  // get length
94  $pdflen = strlen($this->pdfdata);
95  // initialize class for decoding filters
96  $this->FilterDecoders = new TCPDF_FILTERS();
97  // get xref and trailer data
98  $this->xref = $this->getXrefData();
99  // parse all document objects
100  $this->objects = array();
101  foreach ($this->xref['xref'] as $obj => $offset) {
102  if (!isset($this->objects[$obj])) {
103  $this->objects[$obj] = $this->getIndirectObject($obj, $offset, true);
104  }
105  }
106  // release some memory
107  unset($this->pdfdata);
108  $this->pdfdata = '';
109  }
110 
117  public function getParsedData() {
118  return array($this->xref, $this->objects);
119  }
120 
129  protected function getXrefData($offset=0, $xref=array()) {
130  if ($offset == 0) {
131  // find last startxref
132  if (preg_match_all('/[\r\n]startxref[\s]*[\r\n]+([0-9]+)[\s]*[\r\n]+%%EOF/i', $this->pdfdata, $matches, PREG_SET_ORDER, $offset) == 0) {
133  $this->Error('Unable to find startxref');
134  }
135  $matches = array_pop($matches);
136  $startxref = $matches[1];
137  } else {
138  // get the first xref at the specified offset
139  if (preg_match('/[\r\n]startxref[\s]*[\r\n]+([0-9]+)[\s]*[\r\n]+%%EOF/i', $this->pdfdata, $matches, PREG_OFFSET_CAPTURE, $offset) == 0) {
140  $this->Error('Unable to find startxref');
141  }
142  $startxref = $matches[1][0];
143  }
144  // check xref position
145  if (strpos($this->pdfdata, 'xref', $startxref) != $startxref) {
146  $this->Error('Unable to find xref');
147  }
148  // extract xref data (object indexes and offsets)
149  $xoffset = $startxref + 5;
150  // initialize object number
151  $obj_num = 0;
152  $offset = $xoffset;
153  while (preg_match('/^([0-9]+)[\s]([0-9]+)[\s]?([nf]?)/im', $this->pdfdata, $matches, PREG_OFFSET_CAPTURE, $offset) > 0) {
154  $offset = (strlen($matches[0][0]) + $matches[0][1]);
155  if ($matches[3][0] == 'n') {
156  // create unique object index: [object number]_[generation number]
157  $index = $obj_num.'_'.intval($matches[2][0]);
158  // check if object already exist
159  if (!isset($xref['xref'][$index])) {
160  // store object offset position
161  $xref['xref'][$index] = intval($matches[1][0]);
162  }
163  ++$obj_num;
164  $offset += 2;
165  } elseif ($matches[3][0] == 'f') {
166  ++$obj_num;
167  $offset += 2;
168  } else {
169  // object number (index)
170  $obj_num = intval($matches[1][0]);
171  }
172  }
173  // get trailer data
174  if (preg_match('/trailer[\s]*<<(.*)>>[\s]*[\r\n]+startxref[\s]*[\r\n]+/isU', $this->pdfdata, $matches, PREG_OFFSET_CAPTURE, $xoffset) > 0) {
175  $trailer_data = $matches[1][0];
176  if (!isset($xref['trailer'])) {
177  // get only the last updated version
178  $xref['trailer'] = array();
179  // parse trailer_data
180  if (preg_match('/Size[\s]+([0-9]+)/i', $trailer_data, $matches) > 0) {
181  $xref['trailer']['size'] = intval($matches[1]);
182  }
183  if (preg_match('/Root[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailer_data, $matches) > 0) {
184  $xref['trailer']['root'] = intval($matches[1]).'_'.intval($matches[2]);
185  }
186  if (preg_match('/Encrypt[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailer_data, $matches) > 0) {
187  $xref['trailer']['encrypt'] = intval($matches[1]).'_'.intval($matches[2]);
188  }
189  if (preg_match('/Info[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailer_data, $matches) > 0) {
190  $xref['trailer']['info'] = intval($matches[1]).'_'.intval($matches[2]);
191  }
192  if (preg_match('/ID[\s]*[\[][\s]*[<]([^>]*)[>][\s]*[<]([^>]*)[>]/i', $trailer_data, $matches) > 0) {
193  $xref['trailer']['id'] = array();
194  $xref['trailer']['id'][0] = $matches[1];
195  $xref['trailer']['id'][1] = $matches[2];
196  }
197  }
198  if (preg_match('/Prev[\s]+([0-9]+)/i', $trailer_data, $matches) > 0) {
199  // get previous xref
200  $xref = $this->getXrefData(intval($matches[1]), $xref);
201  }
202  } else {
203  $this->Error('Unable to find trailer');
204  }
205  return $xref;
206  }
207 
215  protected function getRawObject($offset=0) {
216  $objtype = ''; // object type to be returned
217  $objval = ''; // object value to be returned
218  // skip initial white space chars: \x00 null (NUL), \x09 horizontal tab (HT), \x0A line feed (LF), \x0C form feed (FF), \x0D carriage return (CR), \x20 space (SP)
219  $offset += strspn($this->pdfdata, "\x00\x09\x0a\x0c\x0d\x20", $offset);
220  // get first char
221  $char = $this->pdfdata{$offset};
222  // get object type
223  switch ($char) {
224  case '%': { // \x25 PERCENT SIGN
225  // skip comment and search for next token
226  $next = strcspn($this->pdfdata, "\r\n", $offset);
227  if ($next > 0) {
228  $offset += $next;
229  return $this->getRawObject($this->pdfdata, $offset);
230  }
231  break;
232  }
233  case '/': { // \x2F SOLIDUS
234  // name object
235  $objtype = $char;
236  ++$offset;
237  if (preg_match('/^([^\x00\x09\x0a\x0c\x0d\x20\s\x28\x29\x3c\x3e\x5b\x5d\x7b\x7d\x2f\x25]+)/', substr($this->pdfdata, $offset, 256), $matches) == 1) {
238  $objval = $matches[1]; // unescaped value
239  $offset += strlen($objval);
240  }
241  break;
242  }
243  case '(': // \x28 LEFT PARENTHESIS
244  case ')': { // \x29 RIGHT PARENTHESIS
245  // literal string object
246  $objtype = $char;
247  ++$offset;
248  $strpos = $offset;
249  if ($char == '(') {
250  $open_bracket = 1;
251  while ($open_bracket > 0) {
252  if (!isset($this->pdfdata{$strpos})) {
253  break;
254  }
255  $ch = $this->pdfdata{$strpos};
256  switch ($ch) {
257  case '\\': { // REVERSE SOLIDUS (5Ch) (Backslash)
258  // skip next character
259  ++$strpos;
260  break;
261  }
262  case '(': { // LEFT PARENHESIS (28h)
263  ++$open_bracket;
264  break;
265  }
266  case ')': { // RIGHT PARENTHESIS (29h)
267  --$open_bracket;
268  break;
269  }
270  }
271  ++$strpos;
272  }
273  $objval = substr($this->pdfdata, $offset, ($strpos - $offset - 1));
274  $offset = $strpos;
275  }
276  break;
277  }
278  case '[': // \x5B LEFT SQUARE BRACKET
279  case ']': { // \x5D RIGHT SQUARE BRACKET
280  // array object
281  $objtype = $char;
282  ++$offset;
283  if ($char == '[') {
284  // get array content
285  $objval = array();
286  do {
287  // get element
288  $element = $this->getRawObject($offset);
289  $offset = $element[2];
290  $objval[] = $element;
291  } while ($element[0] != ']');
292  // remove closing delimiter
293  array_pop($objval);
294  }
295  break;
296  }
297  case '<': // \x3C LESS-THAN SIGN
298  case '>': { // \x3E GREATER-THAN SIGN
299  if (isset($this->pdfdata{($offset + 1)}) AND ($this->pdfdata{($offset + 1)} == $char)) {
300  // dictionary object
301  $objtype = $char.$char;
302  $offset += 2;
303  if ($char == '<') {
304  // get array content
305  $objval = array();
306  do {
307  // get element
308  $element = $this->getRawObject($offset);
309  $offset = $element[2];
310  $objval[] = $element;
311  } while ($element[0] != '>>');
312  // remove closing delimiter
313  array_pop($objval);
314  }
315  } else {
316  // hexadecimal string object
317  $objtype = $char;
318  ++$offset;
319  if (($char == '<') AND (preg_match('/^([0-9A-Fa-f]+)[>]/iU', substr($this->pdfdata, $offset), $matches) == 1)) {
320  $objval = $matches[1];
321  $offset += strlen($matches[0]);
322  }
323  }
324  break;
325  }
326  default: {
327  if (substr($this->pdfdata, $offset, 6) == 'endobj') {
328  // indirect object
329  $objtype = 'endobj';
330  $offset += 6;
331  } elseif (substr($this->pdfdata, $offset, 4) == 'null') {
332  // null object
333  $objtype = 'null';
334  $offset += 4;
335  $objval = 'null';
336  } elseif (substr($this->pdfdata, $offset, 4) == 'true') {
337  // boolean true object
338  $objtype = 'boolean';
339  $offset += 4;
340  $objval = 'true';
341  } elseif (substr($this->pdfdata, $offset, 5) == 'false') {
342  // boolean false object
343  $objtype = 'boolean';
344  $offset += 5;
345  $objval = 'false';
346  } elseif (substr($this->pdfdata, $offset, 6) == 'stream') {
347  // start stream object
348  $objtype = 'stream';
349  $offset += 6;
350  if (preg_match('/^[\r\n]+(.*)[\r\n]*endstream/isU', substr($this->pdfdata, $offset), $matches) == 1) {
351  $objval = $matches[1];
352  $offset += strlen($matches[0]);
353  }
354  } elseif (substr($this->pdfdata, $offset, 9) == 'endstream') {
355  // end stream object
356  $objtype = 'endstream';
357  $offset += 9;
358  } elseif (preg_match('/^([0-9]+)[\s]+([0-9]+)[\s]+R/iU', substr($this->pdfdata, $offset, 33), $matches) == 1) {
359  // indirect object reference
360  $objtype = 'ojbref';
361  $offset += strlen($matches[0]);
362  $objval = intval($matches[1]).'_'.intval($matches[2]);
363  } elseif (preg_match('/^([0-9]+)[\s]+([0-9]+)[\s]+obj/iU', substr($this->pdfdata, $offset, 33), $matches) == 1) {
364  // object start
365  $objtype = 'ojb';
366  $objval = intval($matches[1]).'_'.intval($matches[2]);
367  $offset += strlen ($matches[0]);
368  } elseif (($numlen = strspn($this->pdfdata, '+-.0123456789', $offset)) > 0) {
369  // numeric object
370  $objtype = 'numeric';
371  $objval = substr($this->pdfdata, $offset, $numlen);
372  $offset += $numlen;
373  }
374  break;
375  }
376  }
377  return array($objtype, $objval, $offset);
378  }
379 
389  protected function getIndirectObject($obj_ref, $offset=0, $decoding=true) {
390  $obj = explode('_', $obj_ref);
391  if (($obj === false) OR (count($obj) != 2)) {
392  $this->Error('Invalid object reference: '.$obj);
393  return;
394  }
395  $objref = $obj[0].' '.$obj[1].' obj';
396  if (strpos($this->pdfdata, $objref, $offset) != $offset) {
397  // an indirect reference to an undefined object shall be considered a reference to the null object
398  return array('null', 'null', $offset);
399  }
400  // starting position of object content
401  $offset += strlen($objref);
402  // get array of object content
403  $objdata = array();
404  $i = 0; // object main index
405  do {
406  // get element
407  $element = $this->getRawObject($offset);
408  $offset = $element[2];
409  // decode stream using stream's dictionary information
410  if ($decoding AND ($element[0] == 'stream') AND (isset($objdata[($i - 1)][0])) AND ($objdata[($i - 1)][0] == '<<')) {
411  $element[3] = $this->decodeStream($objdata[($i - 1)][1], substr($element[1], 1));
412  }
413  $objdata[$i] = $element;
414  ++$i;
415  } while ($element[0] != 'endobj');
416  // remove closing delimiter
417  array_pop($objdata);
418  // return raw object content
419  return $objdata;
420  }
421 
429  protected function getObjectVal($obj) {
430  if ($obj[0] == 'objref') {
431  // reference to indirect object
432  if (isset($this->objects[$obj[1]])) {
433  // this object has been already parsed
434  return $this->objects[$obj[1]];
435  } elseif (isset($this->xref[$obj[1]])) {
436  // parse new object
437  $this->objects[$obj[1]] = $this->getIndirectObject($obj[1], $this->xref[$obj[1]], false);
438  return $this->objects[$obj[1]];
439  }
440  }
441  return $obj;
442  }
443 
452  protected function decodeStream($sdic, $stream) {
453  // get stream lenght and filters
454  $slength = strlen($stream);
455  $filters = array();
456  foreach ($sdic as $k => $v) {
457  if ($v[0] == '/') {
458  if (($v[1] == 'Length') AND (isset($sdic[($k + 1)])) AND ($sdic[($k + 1)][0] == 'numeric')) {
459  // get declared stream lenght
460  $declength = intval($sdic[($k + 1)][1]);
461  if ($declength < $slength) {
462  $stream = substr($stream, 0, $declength);
463  $slength = $declength;
464  }
465  } elseif (($v[1] == 'Filter') AND (isset($sdic[($k + 1)]))) {
466  // resolve indirect object
467  $objval = $this->getObjectVal($sdic[($k + 1)]);
468  if ($objval[0] == '/') {
469  // single filter
470  $filters[] = $objval[1];
471  } elseif ($objval[0] == '[') {
472  // array of filters
473  foreach ($objval[1] as $flt) {
474  if ($flt[0] == '/') {
475  $filters[] = $flt[1];
476  }
477  }
478  }
479  }
480  }
481  }
482  // decode the stream
483  $remaining_filters = array();
484  foreach ($filters as $filter) {
485  if (in_array($filter, $this->FilterDecoders->getAvailableFilters())) {
486  $stream = $this->FilterDecoders->decodeFilter($filter, $stream);
487  } else {
488  // add missing filter to array
489  $remaining_filters[] = $filter;
490  }
491  }
492  return array($stream, $remaining_filters);
493  }
494 
501  public function Error($msg) {
502  // exit program and print error
503  die('<strong>TCPDF_PARSER ERROR: </strong>'.$msg);
504  }
505 
506 } // END OF TCPDF_PARSER CLASS
507 
508 //============================================================+
509 // END OF FILE
510 //============================================================+
foreach(array('date1', 'date2', 'type', 'renewals', 'width') as $item) $data
Definition: chart-data.php:29
getXrefData($offset=0, $xref=array())
Get xref (cross-reference table) and trailer data from PDF document data.
This is a PHP class for parsing PDF documents.
getParsedData()
Return an array of parsed PDF document objects.
This is a PHP class for decoding common PDF filters (PDF 32000-2008 - 7.4 Filters).
decodeStream($sdic, $stream)
Decode the specified stream.
getRawObject($offset=0)
Get object type, raw value and offset to next object.
if(strlen($date2)== '10') if($type== 'users-by-age'&&a_priv('admin_users', true)) elseif($type== 'forums-count'&&a_priv('admin_content', true)) elseif($type== 'forums-categories'&&a_priv('admin_content', true)) elseif($type== 'users-count'&&a_priv('admin_users', true)) elseif($type== 'product-categories'&&a_priv('admin_products', true)) elseif($type== 'users-by-sex'&&a_priv('admin_users', true)) elseif($type== 'users-by-country'&&a_priv('admin_users', true)) elseif($type== 'sales'&&a_priv('admin_sales', true))
Definition: chart-data.php:160
__construct($data)
Parse a PDF document an return an array of objects.
Error($msg)
This method is automatically called in case of fatal error; it simply outputs the message and halts t...
$xref
XREF data.
getIndirectObject($obj_ref, $offset=0, $decoding=true)
Get content of indirect object.
$objects
Array of PDF objects.
getObjectVal($obj)
Get the content of object, resolving indect object reference if necessary.

This documentation for Open ecommerce PEEL Shopping and PEEL.fr has been generated by Doxygen on Thu Oct 15 2015 14:30:04 - Peel ecommerce is a product of Agence web Advisto SAS. All rights reserved.