1 <?php
  2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36  37  38  39  40  41  42  43 
 44 
 45  46  47  48  49  50  51  52  53 
 54 class SimplePie_Parser
 55 {
 56     var $error_code;
 57     var $error_string;
 58     var $current_line;
 59     var $current_column;
 60     var $current_byte;
 61     var $separator = ' ';
 62     var $namespace = array('');
 63     var $element = array('');
 64     var $xml_base = array('');
 65     var $xml_base_explicit = array(false);
 66     var $xml_lang = array('');
 67     var $data = array();
 68     var $datas = array(array());
 69     var $current_xhtml_construct = -1;
 70     var $encoding;
 71     protected $registry;
 72 
 73     public function set_registry(SimplePie_Registry $registry)
 74     {
 75         $this->registry = $registry;
 76     }
 77 
 78     public function parse(&$data, $encoding)
 79     {
 80         
 81         if (strtoupper($encoding) === 'US-ASCII')
 82         {
 83             $this->encoding = 'UTF-8';
 84         }
 85         else
 86         {
 87             $this->encoding = $encoding;
 88         }
 89 
 90         
 91         
 92         if (substr($data, 0, 4) === "\x00\x00\xFE\xFF")
 93         {
 94             $data = substr($data, 4);
 95         }
 96         
 97         elseif (substr($data, 0, 4) === "\xFF\xFE\x00\x00")
 98         {
 99             $data = substr($data, 4);
100         }
101         
102         elseif (substr($data, 0, 2) === "\xFE\xFF")
103         {
104             $data = substr($data, 2);
105         }
106         
107         elseif (substr($data, 0, 2) === "\xFF\xFE")
108         {
109             $data = substr($data, 2);
110         }
111         
112         elseif (substr($data, 0, 3) === "\xEF\xBB\xBF")
113         {
114             $data = substr($data, 3);
115         }
116 
117         if (substr($data, 0, 5) === '<?xml' && strspn(substr($data, 5, 1), "\x09\x0A\x0D\x20") && ($pos = strpos($data, '?>')) !== false)
118         {
119             $declaration = $this->registry->create('XML_Declaration_Parser', array(substr($data, 5, $pos - 5)));
120             if ($declaration->parse())
121             {
122                 $data = substr($data, $pos + 2);
123                 $data = '<?xml version="' . $declaration->version . '" encoding="' . $encoding . '" standalone="' . (($declaration->standalone) ? 'yes' : 'no') . '"?>' . $data;
124             }
125             else
126             {
127                 $this->error_string = 'SimplePie bug! Please report this!';
128                 return false;
129             }
130         }
131 
132         $return = true;
133 
134         static $xml_is_sane = null;
135         if ($xml_is_sane === null)
136         {
137             $parser_check = xml_parser_create();
138             xml_parse_into_struct($parser_check, '<foo>&</foo>', $values);
139             xml_parser_free($parser_check);
140             $xml_is_sane = isset($values[0]['value']);
141         }
142 
143         
144         if ($xml_is_sane)
145         {
146             $xml = xml_parser_create_ns($this->encoding, $this->separator);
147             xml_parser_set_option($xml, XML_OPTION_SKIP_WHITE, 1);
148             xml_parser_set_option($xml, XML_OPTION_CASE_FOLDING, 0);
149             xml_set_object($xml, $this);
150             xml_set_character_data_handler($xml, 'cdata');
151             xml_set_element_handler($xml, 'tag_open', 'tag_close');
152 
153             
154             if (!xml_parse($xml, $data, true))
155             {
156                 $this->error_code = xml_get_error_code($xml);
157                 $this->error_string = xml_error_string($this->error_code);
158                 $return = false;
159             }
160             $this->current_line = xml_get_current_line_number($xml);
161             $this->current_column = xml_get_current_column_number($xml);
162             $this->current_byte = xml_get_current_byte_index($xml);
163             xml_parser_free($xml);
164             return $return;
165         }
166         else
167         {
168             libxml_clear_errors();
169             $xml = new XMLReader();
170             $xml->xml($data);
171             while (@$xml->read())
172             {
173                 switch ($xml->nodeType)
174                 {
175 
176                     case constant('XMLReader::END_ELEMENT'):
177                         if ($xml->namespaceURI !== '')
178                         {
179                             $tagName = $xml->namespaceURI . $this->separator . $xml->localName;
180                         }
181                         else
182                         {
183                             $tagName = $xml->localName;
184                         }
185                         $this->tag_close(null, $tagName);
186                         break;
187                     case constant('XMLReader::ELEMENT'):
188                         $empty = $xml->isEmptyElement;
189                         if ($xml->namespaceURI !== '')
190                         {
191                             $tagName = $xml->namespaceURI . $this->separator . $xml->localName;
192                         }
193                         else
194                         {
195                             $tagName = $xml->localName;
196                         }
197                         $attributes = array();
198                         while ($xml->moveToNextAttribute())
199                         {
200                             if ($xml->namespaceURI !== '')
201                             {
202                                 $attrName = $xml->namespaceURI . $this->separator . $xml->localName;
203                             }
204                             else
205                             {
206                                 $attrName = $xml->localName;
207                             }
208                             $attributes[$attrName] = $xml->value;
209                         }
210                         $this->tag_open(null, $tagName, $attributes);
211                         if ($empty)
212                         {
213                             $this->tag_close(null, $tagName);
214                         }
215                         break;
216                     case constant('XMLReader::TEXT'):
217 
218                     case constant('XMLReader::CDATA'):
219                         $this->cdata(null, $xml->value);
220                         break;
221                 }
222             }
223             if ($error = libxml_get_last_error())
224             {
225                 $this->error_code = $error->code;
226                 $this->error_string = $error->message;
227                 $this->current_line = $error->line;
228                 $this->current_column = $error->column;
229                 return false;
230             }
231             else
232             {
233                 return true;
234             }
235         }
236     }
237 
238     public function get_error_code()
239     {
240         return $this->error_code;
241     }
242 
243     public function get_error_string()
244     {
245         return $this->error_string;
246     }
247 
248     public function get_current_line()
249     {
250         return $this->current_line;
251     }
252 
253     public function get_current_column()
254     {
255         return $this->current_column;
256     }
257 
258     public function get_current_byte()
259     {
260         return $this->current_byte;
261     }
262 
263     public function get_data()
264     {
265         return $this->data;
266     }
267 
268     public function tag_open($parser, $tag, $attributes)
269     {
270         list($this->namespace[], $this->element[]) = $this->split_ns($tag);
271 
272         $attribs = array();
273         foreach ($attributes as $name => $value)
274         {
275             list($attrib_namespace, $attribute) = $this->split_ns($name);
276             $attribs[$attrib_namespace][$attribute] = $value;
277         }
278 
279         if (isset($attribs[SIMPLEPIE_NAMESPACE_XML]['base']))
280         {
281             $base = $this->registry->call('Misc', 'absolutize_url', array($attribs[SIMPLEPIE_NAMESPACE_XML]['base'], end($this->xml_base)));
282             if ($base !== false)
283             {
284                 $this->xml_base[] = $base;
285                 $this->xml_base_explicit[] = true;
286             }
287         }
288         else
289         {
290             $this->xml_base[] = end($this->xml_base);
291             $this->xml_base_explicit[] = end($this->xml_base_explicit);
292         }
293 
294         if (isset($attribs[SIMPLEPIE_NAMESPACE_XML]['lang']))
295         {
296             $this->xml_lang[] = $attribs[SIMPLEPIE_NAMESPACE_XML]['lang'];
297         }
298         else
299         {
300             $this->xml_lang[] = end($this->xml_lang);
301         }
302 
303         if ($this->current_xhtml_construct >= 0)
304         {
305             $this->current_xhtml_construct++;
306             if (end($this->namespace) === SIMPLEPIE_NAMESPACE_XHTML)
307             {
308                 $this->data['data'] .= '<' . end($this->element);
309                 if (isset($attribs['']))
310                 {
311                     foreach ($attribs[''] as $name => $value)
312                     {
313                         $this->data['data'] .= ' ' . $name . '="' . htmlspecialchars($value, ENT_COMPAT, $this->encoding) . '"';
314                     }
315                 }
316                 $this->data['data'] .= '>';
317             }
318         }
319         else
320         {
321             $this->datas[] =& $this->data;
322             $this->data =& $this->data['child'][end($this->namespace)][end($this->element)][];
323             $this->data = array('data' => '', 'attribs' => $attribs, 'xml_base' => end($this->xml_base), 'xml_base_explicit' => end($this->xml_base_explicit), 'xml_lang' => end($this->xml_lang));
324             if ((end($this->namespace) === SIMPLEPIE_NAMESPACE_ATOM_03 && in_array(end($this->element), array('title', 'tagline', 'copyright', 'info', 'summary', 'content')) && isset($attribs['']['mode']) && $attribs['']['mode'] === 'xml')
325             || (end($this->namespace) === SIMPLEPIE_NAMESPACE_ATOM_10 && in_array(end($this->element), array('rights', 'subtitle', 'summary', 'info', 'title', 'content')) && isset($attribs['']['type']) && $attribs['']['type'] === 'xhtml')
326             || (end($this->namespace) === SIMPLEPIE_NAMESPACE_RSS_20 && in_array(end($this->element), array('title')))
327             || (end($this->namespace) === SIMPLEPIE_NAMESPACE_RSS_090 && in_array(end($this->element), array('title')))
328             || (end($this->namespace) === SIMPLEPIE_NAMESPACE_RSS_10 && in_array(end($this->element), array('title'))))
329             {
330                 $this->current_xhtml_construct = 0;
331             }
332         }
333     }
334 
335     public function cdata($parser, $cdata)
336     {
337         if ($this->current_xhtml_construct >= 0)
338         {
339             $this->data['data'] .= htmlspecialchars($cdata, ENT_QUOTES, $this->encoding);
340         }
341         else
342         {
343             $this->data['data'] .= $cdata;
344         }
345     }
346 
347     public function tag_close($parser, $tag)
348     {
349         if ($this->current_xhtml_construct >= 0)
350         {
351             $this->current_xhtml_construct--;
352             if (end($this->namespace) === SIMPLEPIE_NAMESPACE_XHTML && !in_array(end($this->element), array('area', 'base', 'basefont', 'br', 'col', 'frame', 'hr', 'img', 'input', 'isindex', 'link', 'meta', 'param')))
353             {
354                 $this->data['data'] .= '</' . end($this->element) . '>';
355             }
356         }
357         if ($this->current_xhtml_construct === -1)
358         {
359             $this->data =& $this->datas[count($this->datas) - 1];
360             array_pop($this->datas);
361         }
362 
363         array_pop($this->element);
364         array_pop($this->namespace);
365         array_pop($this->xml_base);
366         array_pop($this->xml_base_explicit);
367         array_pop($this->xml_lang);
368     }
369 
370     public function split_ns($string)
371     {
372         static $cache = array();
373         if (!isset($cache[$string]))
374         {
375             if ($pos = strpos($string, $this->separator))
376             {
377                 static $separator_length;
378                 if (!$separator_length)
379                 {
380                     $separator_length = strlen($this->separator);
381                 }
382                 $namespace = substr($string, 0, $pos);
383                 $local_name = substr($string, $pos + $separator_length);
384                 if (strtolower($namespace) === SIMPLEPIE_NAMESPACE_ITUNES)
385                 {
386                     $namespace = SIMPLEPIE_NAMESPACE_ITUNES;
387                 }
388 
389                 
390                 if ($namespace === SIMPLEPIE_NAMESPACE_MEDIARSS_WRONG ||
391                     $namespace === SIMPLEPIE_NAMESPACE_MEDIARSS_WRONG2 ||
392                     $namespace === SIMPLEPIE_NAMESPACE_MEDIARSS_WRONG3 ||
393                     $namespace === SIMPLEPIE_NAMESPACE_MEDIARSS_WRONG4 ||
394                     $namespace === SIMPLEPIE_NAMESPACE_MEDIARSS_WRONG5 )
395                 {
396                     $namespace = SIMPLEPIE_NAMESPACE_MEDIARSS;
397                 }
398                 $cache[$string] = array($namespace, $local_name);
399             }
400             else
401             {
402                 $cache[$string] = array('', $string);
403             }
404         }
405         return $cache[$string];
406     }
407 }
408