source: XmlTools2/trunk/libs/pugixml.hpp@ 967

Last change on this file since 967 was 906, checked in by s10k, 11 years ago
File size: 43.3 KB
RevLine 
[906]1/**
2 * pugixml parser - version 1.2
3 * --------------------------------------------------------
4 * Copyright (C) 2006-2012, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
5 * Report bugs and download new versions at http://pugixml.org/
6 *
7 * This library is distributed under the MIT License. See notice at the end
8 * of this file.
9 *
10 * This work is based on the pugxml parser, which is:
11 * Copyright (C) 2003, by Kristen Wegner (kristen@tima.net)
12 */
13
14#ifndef PUGIXML_VERSION
15// Define version macro; evaluates to major * 100 + minor so that it's safe to use in less-than comparisons
16# define PUGIXML_VERSION 120
17#endif
18
19// Include user configuration file (this can define various configuration macros)
20#include "pugiconfig.hpp"
21
22#ifndef HEADER_PUGIXML_HPP
23#define HEADER_PUGIXML_HPP
24
25// Include stddef.h for size_t and ptrdiff_t
26#include <stddef.h>
27
28// Include exception header for XPath
29#if !defined(PUGIXML_NO_XPATH) && !defined(PUGIXML_NO_EXCEPTIONS)
30# include <exception>
31#endif
32
33// Include STL headers
34#ifndef PUGIXML_NO_STL
35# include <iterator>
36# include <iosfwd>
37# include <string>
38#endif
39
40// Macro for deprecated features
41#ifndef PUGIXML_DEPRECATED
42# if defined(__GNUC__)
43# define PUGIXML_DEPRECATED __attribute__((deprecated))
44# elif defined(_MSC_VER) && _MSC_VER >= 1300
45# define PUGIXML_DEPRECATED __declspec(deprecated)
46# else
47# define PUGIXML_DEPRECATED
48# endif
49#endif
50
51// If no API is defined, assume default
52#ifndef PUGIXML_API
53# define PUGIXML_API
54#endif
55
56// If no API for classes is defined, assume default
57#ifndef PUGIXML_CLASS
58# define PUGIXML_CLASS PUGIXML_API
59#endif
60
61// If no API for functions is defined, assume default
62#ifndef PUGIXML_FUNCTION
63# define PUGIXML_FUNCTION PUGIXML_API
64#endif
65
66// Character interface macros
67#ifdef PUGIXML_WCHAR_MODE
68# define PUGIXML_TEXT(t) L ## t
69# define PUGIXML_CHAR wchar_t
70#else
71# define PUGIXML_TEXT(t) t
72# define PUGIXML_CHAR char
73#endif
74
75namespace pugi
76{
77 // Character type used for all internal storage and operations; depends on PUGIXML_WCHAR_MODE
78 typedef PUGIXML_CHAR char_t;
79
80#ifndef PUGIXML_NO_STL
81 // String type used for operations that work with STL string; depends on PUGIXML_WCHAR_MODE
82 typedef std::basic_string<PUGIXML_CHAR, std::char_traits<PUGIXML_CHAR>, std::allocator<PUGIXML_CHAR> > string_t;
83#endif
84}
85
86// The PugiXML namespace
87namespace pugi
88{
89 // Tree node types
90 enum xml_node_type
91 {
92 node_null, // Empty (null) node handle
93 node_document, // A document tree's absolute root
94 node_element, // Element tag, i.e. '<node/>'
95 node_pcdata, // Plain character data, i.e. 'text'
96 node_cdata, // Character data, i.e. '<![CDATA[text]]>'
97 node_comment, // Comment tag, i.e. '<!-- text -->'
98 node_pi, // Processing instruction, i.e. '<?name?>'
99 node_declaration, // Document declaration, i.e. '<?xml version="1.0"?>'
100 node_doctype // Document type declaration, i.e. '<!DOCTYPE doc>'
101 };
102
103 // Parsing options
104
105 // Minimal parsing mode (equivalent to turning all other flags off).
106 // Only elements and PCDATA sections are added to the DOM tree, no text conversions are performed.
107 const unsigned int parse_minimal = 0x0000;
108
109 // This flag determines if processing instructions (node_pi) are added to the DOM tree. This flag is off by default.
110 const unsigned int parse_pi = 0x0001;
111
112 // This flag determines if comments (node_comment) are added to the DOM tree. This flag is off by default.
113 const unsigned int parse_comments = 0x0002;
114
115 // This flag determines if CDATA sections (node_cdata) are added to the DOM tree. This flag is on by default.
116 const unsigned int parse_cdata = 0x0004;
117
118 // This flag determines if plain character data (node_pcdata) that consist only of whitespace are added to the DOM tree.
119 // This flag is off by default; turning it on usually results in slower parsing and more memory consumption.
120 const unsigned int parse_ws_pcdata = 0x0008;
121
122 // This flag determines if character and entity references are expanded during parsing. This flag is on by default.
123 const unsigned int parse_escapes = 0x0010;
124
125 // This flag determines if EOL characters are normalized (converted to #xA) during parsing. This flag is on by default.
126 const unsigned int parse_eol = 0x0020;
127
128 // This flag determines if attribute values are normalized using CDATA normalization rules during parsing. This flag is on by default.
129 const unsigned int parse_wconv_attribute = 0x0040;
130
131 // This flag determines if attribute values are normalized using NMTOKENS normalization rules during parsing. This flag is off by default.
132 const unsigned int parse_wnorm_attribute = 0x0080;
133
134 // This flag determines if document declaration (node_declaration) is added to the DOM tree. This flag is off by default.
135 const unsigned int parse_declaration = 0x0100;
136
137 // This flag determines if document type declaration (node_doctype) is added to the DOM tree. This flag is off by default.
138 const unsigned int parse_doctype = 0x0200;
139
140 // This flag determines if plain character data (node_pcdata) that is the only child of the parent node and that consists only
141 // of whitespace is added to the DOM tree.
142 // This flag is off by default; turning it on may result in slower parsing and more memory consumption.
143 const unsigned int parse_ws_pcdata_single = 0x0400;
144
145 // The default parsing mode.
146 // Elements, PCDATA and CDATA sections are added to the DOM tree, character/reference entities are expanded,
147 // End-of-Line characters are normalized, attribute values are normalized using CDATA normalization rules.
148 const unsigned int parse_default = parse_cdata | parse_escapes | parse_wconv_attribute | parse_eol;
149
150 // The full parsing mode.
151 // Nodes of all types are added to the DOM tree, character/reference entities are expanded,
152 // End-of-Line characters are normalized, attribute values are normalized using CDATA normalization rules.
153 const unsigned int parse_full = parse_default | parse_pi | parse_comments | parse_declaration | parse_doctype;
154
155 // These flags determine the encoding of input data for XML document
156 enum xml_encoding
157 {
158 encoding_auto, // Auto-detect input encoding using BOM or < / <? detection; use UTF8 if BOM is not found
159 encoding_utf8, // UTF8 encoding
160 encoding_utf16_le, // Little-endian UTF16
161 encoding_utf16_be, // Big-endian UTF16
162 encoding_utf16, // UTF16 with native endianness
163 encoding_utf32_le, // Little-endian UTF32
164 encoding_utf32_be, // Big-endian UTF32
165 encoding_utf32, // UTF32 with native endianness
166 encoding_wchar, // The same encoding wchar_t has (either UTF16 or UTF32)
167 encoding_latin1
168 };
169
170 // Formatting flags
171
172 // Indent the nodes that are written to output stream with as many indentation strings as deep the node is in DOM tree. This flag is on by default.
173 const unsigned int format_indent = 0x01;
174
175 // Write encoding-specific BOM to the output stream. This flag is off by default.
176 const unsigned int format_write_bom = 0x02;
177
178 // Use raw output mode (no indentation and no line breaks are written). This flag is off by default.
179 const unsigned int format_raw = 0x04;
180
181 // Omit default XML declaration even if there is no declaration in the document. This flag is off by default.
182 const unsigned int format_no_declaration = 0x08;
183
184 // Don't escape attribute values and PCDATA contents. This flag is off by default.
185 const unsigned int format_no_escapes = 0x10;
186
187 // Open file using text mode in xml_document::save_file. This enables special character (i.e. new-line) conversions on some systems. This flag is off by default.
188 const unsigned int format_save_file_text = 0x20;
189
190 // The default set of formatting flags.
191 // Nodes are indented depending on their depth in DOM tree, a default declaration is output if document has none.
192 const unsigned int format_default = format_indent;
193
194 // Forward declarations
195 struct xml_attribute_struct;
196 struct xml_node_struct;
197
198 class xml_node_iterator;
199 class xml_attribute_iterator;
200 class xml_named_node_iterator;
201
202 class xml_tree_walker;
203
204 class xml_node;
205
206 class xml_text;
207
208 #ifndef PUGIXML_NO_XPATH
209 class xpath_node;
210 class xpath_node_set;
211 class xpath_query;
212 class xpath_variable_set;
213 #endif
214
215 // Range-based for loop support
216 template <typename It> class xml_object_range
217 {
218 public:
219 typedef It const_iterator;
220
221 xml_object_range(It b, It e): _begin(b), _end(e)
222 {
223 }
224
225 It begin() const { return _begin; }
226 It end() const { return _end; }
227
228 private:
229 It _begin, _end;
230 };
231
232 // Writer interface for node printing (see xml_node::print)
233 class PUGIXML_CLASS xml_writer
234 {
235 public:
236 virtual ~xml_writer() {}
237
238 // Write memory chunk into stream/file/whatever
239 virtual void write(const void* data, size_t size) = 0;
240 };
241
242 // xml_writer implementation for FILE*
243 class PUGIXML_CLASS xml_writer_file: public xml_writer
244 {
245 public:
246 // Construct writer from a FILE* object; void* is used to avoid header dependencies on stdio
247 xml_writer_file(void* file);
248
249 virtual void write(const void* data, size_t size);
250
251 private:
252 void* file;
253 };
254
255 #ifndef PUGIXML_NO_STL
256 // xml_writer implementation for streams
257 class PUGIXML_CLASS xml_writer_stream: public xml_writer
258 {
259 public:
260 // Construct writer from an output stream object
261 xml_writer_stream(std::basic_ostream<char, std::char_traits<char> >& stream);
262 xml_writer_stream(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream);
263
264 virtual void write(const void* data, size_t size);
265
266 private:
267 std::basic_ostream<char, std::char_traits<char> >* narrow_stream;
268 std::basic_ostream<wchar_t, std::char_traits<wchar_t> >* wide_stream;
269 };
270 #endif
271
272 // A light-weight handle for manipulating attributes in DOM tree
273 class PUGIXML_CLASS xml_attribute
274 {
275 friend class xml_attribute_iterator;
276 friend class xml_node;
277
278 private:
279 xml_attribute_struct* _attr;
280
281 typedef void (*unspecified_bool_type)(xml_attribute***);
282
283 public:
284 // Default constructor. Constructs an empty attribute.
285 xml_attribute();
286
287 // Constructs attribute from internal pointer
288 explicit xml_attribute(xml_attribute_struct* attr);
289
290 // Safe bool conversion operator
291 operator unspecified_bool_type() const;
292
293 // Borland C++ workaround
294 bool operator!() const;
295
296 // Comparison operators (compares wrapped attribute pointers)
297 bool operator==(const xml_attribute& r) const;
298 bool operator!=(const xml_attribute& r) const;
299 bool operator<(const xml_attribute& r) const;
300 bool operator>(const xml_attribute& r) const;
301 bool operator<=(const xml_attribute& r) const;
302 bool operator>=(const xml_attribute& r) const;
303
304 // Check if attribute is empty
305 bool empty() const;
306
307 // Get attribute name/value, or "" if attribute is empty
308 const char_t* name() const;
309 const char_t* value() const;
310
311 // Get attribute value, or the default value if attribute is empty
312 const char_t* as_string(const char_t* def = PUGIXML_TEXT("")) const;
313
314 // Get attribute value as a number, or the default value if conversion did not succeed or attribute is empty
315 int as_int(int def = 0) const;
316 unsigned int as_uint(unsigned int def = 0) const;
317 double as_double(double def = 0) const;
318 float as_float(float def = 0) const;
319
320 // Get attribute value as bool (returns true if first character is in '1tTyY' set), or the default value if attribute is empty
321 bool as_bool(bool def = false) const;
322
323 // Set attribute name/value (returns false if attribute is empty or there is not enough memory)
324 bool set_name(const char_t* rhs);
325 bool set_value(const char_t* rhs);
326
327 // Set attribute value with type conversion (numbers are converted to strings, boolean is converted to "true"/"false")
328 bool set_value(int rhs);
329 bool set_value(unsigned int rhs);
330 bool set_value(double rhs);
331 bool set_value(bool rhs);
332
333 // Set attribute value (equivalent to set_value without error checking)
334 xml_attribute& operator=(const char_t* rhs);
335 xml_attribute& operator=(int rhs);
336 xml_attribute& operator=(unsigned int rhs);
337 xml_attribute& operator=(double rhs);
338 xml_attribute& operator=(bool rhs);
339
340 // Get next/previous attribute in the attribute list of the parent node
341 xml_attribute next_attribute() const;
342 xml_attribute previous_attribute() const;
343
344 // Get hash value (unique for handles to the same object)
345 size_t hash_value() const;
346
347 // Get internal pointer
348 xml_attribute_struct* internal_object() const;
349 };
350
351#ifdef __BORLANDC__
352 // Borland C++ workaround
353 bool PUGIXML_FUNCTION operator&&(const xml_attribute& lhs, bool rhs);
354 bool PUGIXML_FUNCTION operator||(const xml_attribute& lhs, bool rhs);
355#endif
356
357 // A light-weight handle for manipulating nodes in DOM tree
358 class PUGIXML_CLASS xml_node
359 {
360 friend class xml_attribute_iterator;
361 friend class xml_node_iterator;
362 friend class xml_named_node_iterator;
363
364 protected:
365 xml_node_struct* _root;
366
367 typedef void (*unspecified_bool_type)(xml_node***);
368
369 public:
370 // Default constructor. Constructs an empty node.
371 xml_node();
372
373 // Constructs node from internal pointer
374 explicit xml_node(xml_node_struct* p);
375
376 // Safe bool conversion operator
377 operator unspecified_bool_type() const;
378
379 // Borland C++ workaround
380 bool operator!() const;
381
382 // Comparison operators (compares wrapped node pointers)
383 bool operator==(const xml_node& r) const;
384 bool operator!=(const xml_node& r) const;
385 bool operator<(const xml_node& r) const;
386 bool operator>(const xml_node& r) const;
387 bool operator<=(const xml_node& r) const;
388 bool operator>=(const xml_node& r) const;
389
390 // Check if node is empty.
391 bool empty() const;
392
393 // Get node type
394 xml_node_type type() const;
395
396 // Get node name/value, or "" if node is empty or it has no name/value
397 const char_t* name() const;
398 const char_t* value() const;
399
400 // Get attribute list
401 xml_attribute first_attribute() const;
402 xml_attribute last_attribute() const;
403
404 // Get children list
405 xml_node first_child() const;
406 xml_node last_child() const;
407
408 // Get next/previous sibling in the children list of the parent node
409 xml_node next_sibling() const;
410 xml_node previous_sibling() const;
411
412 // Get parent node
413 xml_node parent() const;
414
415 // Get root of DOM tree this node belongs to
416 xml_node root() const;
417
418 // Get text object for the current node
419 xml_text text() const;
420
421 // Get child, attribute or next/previous sibling with the specified name
422 xml_node child(const char_t* name) const;
423 xml_attribute attribute(const char_t* name) const;
424 xml_node next_sibling(const char_t* name) const;
425 xml_node previous_sibling(const char_t* name) const;
426
427 // Get child value of current node; that is, value of the first child node of type PCDATA/CDATA
428 const char_t* child_value() const;
429
430 // Get child value of child with specified name. Equivalent to child(name).child_value().
431 const char_t* child_value(const char_t* name) const;
432
433 // Set node name/value (returns false if node is empty, there is not enough memory, or node can not have name/value)
434 bool set_name(const char_t* rhs);
435 bool set_value(const char_t* rhs);
436
437 // Add attribute with specified name. Returns added attribute, or empty attribute on errors.
438 xml_attribute append_attribute(const char_t* name);
439 xml_attribute prepend_attribute(const char_t* name);
440 xml_attribute insert_attribute_after(const char_t* name, const xml_attribute& attr);
441 xml_attribute insert_attribute_before(const char_t* name, const xml_attribute& attr);
442
443 // Add a copy of the specified attribute. Returns added attribute, or empty attribute on errors.
444 xml_attribute append_copy(const xml_attribute& proto);
445 xml_attribute prepend_copy(const xml_attribute& proto);
446 xml_attribute insert_copy_after(const xml_attribute& proto, const xml_attribute& attr);
447 xml_attribute insert_copy_before(const xml_attribute& proto, const xml_attribute& attr);
448
449 // Add child node with specified type. Returns added node, or empty node on errors.
450 xml_node append_child(xml_node_type type = node_element);
451 xml_node prepend_child(xml_node_type type = node_element);
452 xml_node insert_child_after(xml_node_type type, const xml_node& node);
453 xml_node insert_child_before(xml_node_type type, const xml_node& node);
454
455 // Add child element with specified name. Returns added node, or empty node on errors.
456 xml_node append_child(const char_t* name);
457 xml_node prepend_child(const char_t* name);
458 xml_node insert_child_after(const char_t* name, const xml_node& node);
459 xml_node insert_child_before(const char_t* name, const xml_node& node);
460
461 // Add a copy of the specified node as a child. Returns added node, or empty node on errors.
462 xml_node append_copy(const xml_node& proto);
463 xml_node prepend_copy(const xml_node& proto);
464 xml_node insert_copy_after(const xml_node& proto, const xml_node& node);
465 xml_node insert_copy_before(const xml_node& proto, const xml_node& node);
466
467 // Remove specified attribute
468 bool remove_attribute(const xml_attribute& a);
469 bool remove_attribute(const char_t* name);
470
471 // Remove specified child
472 bool remove_child(const xml_node& n);
473 bool remove_child(const char_t* name);
474
475 // Find attribute using predicate. Returns first attribute for which predicate returned true.
476 template <typename Predicate> xml_attribute find_attribute(Predicate pred) const
477 {
478 if (!_root) return xml_attribute();
479
480 for (xml_attribute attrib = first_attribute(); attrib; attrib = attrib.next_attribute())
481 if (pred(attrib))
482 return attrib;
483
484 return xml_attribute();
485 }
486
487 // Find child node using predicate. Returns first child for which predicate returned true.
488 template <typename Predicate> xml_node find_child(Predicate pred) const
489 {
490 if (!_root) return xml_node();
491
492 for (xml_node node = first_child(); node; node = node.next_sibling())
493 if (pred(node))
494 return node;
495
496 return xml_node();
497 }
498
499 // Find node from subtree using predicate. Returns first node from subtree (depth-first), for which predicate returned true.
500 template <typename Predicate> xml_node find_node(Predicate pred) const
501 {
502 if (!_root) return xml_node();
503
504 xml_node cur = first_child();
505
506 while (cur._root && cur._root != _root)
507 {
508 if (pred(cur)) return cur;
509
510 if (cur.first_child()) cur = cur.first_child();
511 else if (cur.next_sibling()) cur = cur.next_sibling();
512 else
513 {
514 while (!cur.next_sibling() && cur._root != _root) cur = cur.parent();
515
516 if (cur._root != _root) cur = cur.next_sibling();
517 }
518 }
519
520 return xml_node();
521 }
522
523 // Find child node by attribute name/value
524 xml_node find_child_by_attribute(const char_t* name, const char_t* attr_name, const char_t* attr_value) const;
525 xml_node find_child_by_attribute(const char_t* attr_name, const char_t* attr_value) const;
526
527 #ifndef PUGIXML_NO_STL
528 // Get the absolute node path from root as a text string.
529 string_t path(char_t delimiter = '/') const;
530 #endif
531
532 // Search for a node by path consisting of node names and . or .. elements.
533 xml_node first_element_by_path(const char_t* path, char_t delimiter = '/') const;
534
535 // Recursively traverse subtree with xml_tree_walker
536 bool traverse(xml_tree_walker& walker);
537
538 #ifndef PUGIXML_NO_XPATH
539 // Select single node by evaluating XPath query. Returns first node from the resulting node set.
540 xpath_node select_single_node(const char_t* query, xpath_variable_set* variables = 0) const;
541 xpath_node select_single_node(const xpath_query& query) const;
542
543 // Select node set by evaluating XPath query
544 xpath_node_set select_nodes(const char_t* query, xpath_variable_set* variables = 0) const;
545 xpath_node_set select_nodes(const xpath_query& query) const;
546 #endif
547
548 // Print subtree using a writer object
549 void print(xml_writer& writer, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto, unsigned int depth = 0) const;
550
551 #ifndef PUGIXML_NO_STL
552 // Print subtree to stream
553 void print(std::basic_ostream<char, std::char_traits<char> >& os, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto, unsigned int depth = 0) const;
554 void print(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& os, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, unsigned int depth = 0) const;
555 #endif
556
557 // Child nodes iterators
558 typedef xml_node_iterator iterator;
559
560 iterator begin() const;
561 iterator end() const;
562
563 // Attribute iterators
564 typedef xml_attribute_iterator attribute_iterator;
565
566 attribute_iterator attributes_begin() const;
567 attribute_iterator attributes_end() const;
568
569 // Range-based for support
570 xml_object_range<xml_node_iterator> children() const;
571 xml_object_range<xml_named_node_iterator> children(const char_t* name) const;
572 xml_object_range<xml_attribute_iterator> attributes() const;
573
574 // Get node offset in parsed file/string (in char_t units) for debugging purposes
575 ptrdiff_t offset_debug() const;
576
577 // Get hash value (unique for handles to the same object)
578 size_t hash_value() const;
579
580 // Get internal pointer
581 xml_node_struct* internal_object() const;
582 };
583
584#ifdef __BORLANDC__
585 // Borland C++ workaround
586 bool PUGIXML_FUNCTION operator&&(const xml_node& lhs, bool rhs);
587 bool PUGIXML_FUNCTION operator||(const xml_node& lhs, bool rhs);
588#endif
589
590 // A helper for working with text inside PCDATA nodes
591 class PUGIXML_CLASS xml_text
592 {
593 friend class xml_node;
594
595 xml_node_struct* _root;
596
597 typedef void (*unspecified_bool_type)(xml_text***);
598
599 explicit xml_text(xml_node_struct* root);
600
601 xml_node_struct* _data_new();
602 xml_node_struct* _data() const;
603
604 public:
605 // Default constructor. Constructs an empty object.
606 xml_text();
607
608 // Safe bool conversion operator
609 operator unspecified_bool_type() const;
610
611 // Borland C++ workaround
612 bool operator!() const;
613
614 // Check if text object is empty
615 bool empty() const;
616
617 // Get text, or "" if object is empty
618 const char_t* get() const;
619
620 // Get text, or the default value if object is empty
621 const char_t* as_string(const char_t* def = PUGIXML_TEXT("")) const;
622
623 // Get text as a number, or the default value if conversion did not succeed or object is empty
624 int as_int(int def = 0) const;
625 unsigned int as_uint(unsigned int def = 0) const;
626 double as_double(double def = 0) const;
627 float as_float(float def = 0) const;
628
629 // Get text as bool (returns true if first character is in '1tTyY' set), or the default value if object is empty
630 bool as_bool(bool def = false) const;
631
632 // Set text (returns false if object is empty or there is not enough memory)
633 bool set(const char_t* rhs);
634
635 // Set text with type conversion (numbers are converted to strings, boolean is converted to "true"/"false")
636 bool set(int rhs);
637 bool set(unsigned int rhs);
638 bool set(double rhs);
639 bool set(bool rhs);
640
641 // Set text (equivalent to set without error checking)
642 xml_text& operator=(const char_t* rhs);
643 xml_text& operator=(int rhs);
644 xml_text& operator=(unsigned int rhs);
645 xml_text& operator=(double rhs);
646 xml_text& operator=(bool rhs);
647
648 // Get the data node (node_pcdata or node_cdata) for this object
649 xml_node data() const;
650 };
651
652#ifdef __BORLANDC__
653 // Borland C++ workaround
654 bool PUGIXML_FUNCTION operator&&(const xml_text& lhs, bool rhs);
655 bool PUGIXML_FUNCTION operator||(const xml_text& lhs, bool rhs);
656#endif
657
658 // Child node iterator (a bidirectional iterator over a collection of xml_node)
659 class PUGIXML_CLASS xml_node_iterator
660 {
661 friend class xml_node;
662
663 private:
664 mutable xml_node _wrap;
665 xml_node _parent;
666
667 xml_node_iterator(xml_node_struct* ref, xml_node_struct* parent);
668
669 public:
670 // Iterator traits
671 typedef ptrdiff_t difference_type;
672 typedef xml_node value_type;
673 typedef xml_node* pointer;
674 typedef xml_node& reference;
675
676 #ifndef PUGIXML_NO_STL
677 typedef std::bidirectional_iterator_tag iterator_category;
678 #endif
679
680 // Default constructor
681 xml_node_iterator();
682
683 // Construct an iterator which points to the specified node
684 xml_node_iterator(const xml_node& node);
685
686 // Iterator operators
687 bool operator==(const xml_node_iterator& rhs) const;
688 bool operator!=(const xml_node_iterator& rhs) const;
689
690 xml_node& operator*() const;
691 xml_node* operator->() const;
692
693 const xml_node_iterator& operator++();
694 xml_node_iterator operator++(int);
695
696 const xml_node_iterator& operator--();
697 xml_node_iterator operator--(int);
698 };
699
700 // Attribute iterator (a bidirectional iterator over a collection of xml_attribute)
701 class PUGIXML_CLASS xml_attribute_iterator
702 {
703 friend class xml_node;
704
705 private:
706 mutable xml_attribute _wrap;
707 xml_node _parent;
708
709 xml_attribute_iterator(xml_attribute_struct* ref, xml_node_struct* parent);
710
711 public:
712 // Iterator traits
713 typedef ptrdiff_t difference_type;
714 typedef xml_attribute value_type;
715 typedef xml_attribute* pointer;
716 typedef xml_attribute& reference;
717
718 #ifndef PUGIXML_NO_STL
719 typedef std::bidirectional_iterator_tag iterator_category;
720 #endif
721
722 // Default constructor
723 xml_attribute_iterator();
724
725 // Construct an iterator which points to the specified attribute
726 xml_attribute_iterator(const xml_attribute& attr, const xml_node& parent);
727
728 // Iterator operators
729 bool operator==(const xml_attribute_iterator& rhs) const;
730 bool operator!=(const xml_attribute_iterator& rhs) const;
731
732 xml_attribute& operator*() const;
733 xml_attribute* operator->() const;
734
735 const xml_attribute_iterator& operator++();
736 xml_attribute_iterator operator++(int);
737
738 const xml_attribute_iterator& operator--();
739 xml_attribute_iterator operator--(int);
740 };
741
742 // Named node range helper
743 class xml_named_node_iterator
744 {
745 public:
746 // Iterator traits
747 typedef ptrdiff_t difference_type;
748 typedef xml_node value_type;
749 typedef xml_node* pointer;
750 typedef xml_node& reference;
751
752 #ifndef PUGIXML_NO_STL
753 typedef std::forward_iterator_tag iterator_category;
754 #endif
755
756 // Default constructor
757 xml_named_node_iterator();
758
759 // Construct an iterator which points to the specified node
760 xml_named_node_iterator(const xml_node& node, const char_t* name);
761
762 // Iterator operators
763 bool operator==(const xml_named_node_iterator& rhs) const;
764 bool operator!=(const xml_named_node_iterator& rhs) const;
765
766 xml_node& operator*() const;
767 xml_node* operator->() const;
768
769 const xml_named_node_iterator& operator++();
770 xml_named_node_iterator operator++(int);
771
772 private:
773 mutable xml_node _node;
774 const char_t* _name;
775 };
776
777 // Abstract tree walker class (see xml_node::traverse)
778 class PUGIXML_CLASS xml_tree_walker
779 {
780 friend class xml_node;
781
782 private:
783 int _depth;
784
785 protected:
786 // Get current traversal depth
787 int depth() const;
788
789 public:
790 xml_tree_walker();
791 virtual ~xml_tree_walker();
792
793 // Callback that is called when traversal begins
794 virtual bool begin(xml_node& node);
795
796 // Callback that is called for each node traversed
797 virtual bool for_each(xml_node& node) = 0;
798
799 // Callback that is called when traversal ends
800 virtual bool end(xml_node& node);
801 };
802
803 // Parsing status, returned as part of xml_parse_result object
804 enum xml_parse_status
805 {
806 status_ok = 0, // No error
807
808 status_file_not_found, // File was not found during load_file()
809 status_io_error, // Error reading from file/stream
810 status_out_of_memory, // Could not allocate memory
811 status_internal_error, // Internal error occurred
812
813 status_unrecognized_tag, // Parser could not determine tag type
814
815 status_bad_pi, // Parsing error occurred while parsing document declaration/processing instruction
816 status_bad_comment, // Parsing error occurred while parsing comment
817 status_bad_cdata, // Parsing error occurred while parsing CDATA section
818 status_bad_doctype, // Parsing error occurred while parsing document type declaration
819 status_bad_pcdata, // Parsing error occurred while parsing PCDATA section
820 status_bad_start_element, // Parsing error occurred while parsing start element tag
821 status_bad_attribute, // Parsing error occurred while parsing element attribute
822 status_bad_end_element, // Parsing error occurred while parsing end element tag
823 status_end_element_mismatch // There was a mismatch of start-end tags (closing tag had incorrect name, some tag was not closed or there was an excessive closing tag)
824 };
825
826 // Parsing result
827 struct PUGIXML_CLASS xml_parse_result
828 {
829 // Parsing status (see xml_parse_status)
830 xml_parse_status status;
831
832 // Last parsed offset (in char_t units from start of input data)
833 ptrdiff_t offset;
834
835 // Source document encoding
836 xml_encoding encoding;
837
838 // Default constructor, initializes object to failed state
839 xml_parse_result();
840
841 // Cast to bool operator
842 operator bool() const;
843
844 // Get error description
845 const char* description() const;
846 };
847
848 // Document class (DOM tree root)
849 class PUGIXML_CLASS xml_document: public xml_node
850 {
851 private:
852 char_t* _buffer;
853
854 char _memory[192];
855
856 // Non-copyable semantics
857 xml_document(const xml_document&);
858 const xml_document& operator=(const xml_document&);
859
860 void create();
861 void destroy();
862
863 xml_parse_result load_buffer_impl(void* contents, size_t size, unsigned int options, xml_encoding encoding, bool is_mutable, bool own);
864
865 public:
866 // Default constructor, makes empty document
867 xml_document();
868
869 // Destructor, invalidates all node/attribute handles to this document
870 ~xml_document();
871
872 // Removes all nodes, leaving the empty document
873 void reset();
874
875 // Removes all nodes, then copies the entire contents of the specified document
876 void reset(const xml_document& proto);
877
878 #ifndef PUGIXML_NO_STL
879 // Load document from stream.
880 xml_parse_result load(std::basic_istream<char, std::char_traits<char> >& stream, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);
881 xml_parse_result load(std::basic_istream<wchar_t, std::char_traits<wchar_t> >& stream, unsigned int options = parse_default);
882 #endif
883
884 // Load document from zero-terminated string. No encoding conversions are applied.
885 xml_parse_result load(const char_t* contents, unsigned int options = parse_default);
886
887 // Load document from file
888 xml_parse_result load_file(const char* path, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);
889 xml_parse_result load_file(const wchar_t* path, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);
890
891 // Load document from buffer. Copies/converts the buffer, so it may be deleted or changed after the function returns.
892 xml_parse_result load_buffer(const void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);
893
894 // Load document from buffer, using the buffer for in-place parsing (the buffer is modified and used for storage of document data).
895 // You should ensure that buffer data will persist throughout the document's lifetime, and free the buffer memory manually once document is destroyed.
896 xml_parse_result load_buffer_inplace(void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);
897
898 // Load document from buffer, using the buffer for in-place parsing (the buffer is modified and used for storage of document data).
899 // You should allocate the buffer with pugixml allocation function; document will free the buffer when it is no longer needed (you can't use it anymore).
900 xml_parse_result load_buffer_inplace_own(void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);
901
902 // Save XML document to writer (semantics is slightly different from xml_node::print, see documentation for details).
903 void save(xml_writer& writer, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const;
904
905 #ifndef PUGIXML_NO_STL
906 // Save XML document to stream (semantics is slightly different from xml_node::print, see documentation for details).
907 void save(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const;
908 void save(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default) const;
909 #endif
910
911 // Save XML to file
912 bool save_file(const char* path, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const;
913 bool save_file(const wchar_t* path, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const;
914
915 // Get document element
916 xml_node document_element() const;
917 };
918
919#ifndef PUGIXML_NO_XPATH
920 // XPath query return type
921 enum xpath_value_type
922 {
923 xpath_type_none, // Unknown type (query failed to compile)
924 xpath_type_node_set, // Node set (xpath_node_set)
925 xpath_type_number, // Number
926 xpath_type_string, // String
927 xpath_type_boolean // Boolean
928 };
929
930 // XPath parsing result
931 struct PUGIXML_CLASS xpath_parse_result
932 {
933 // Error message (0 if no error)
934 const char* error;
935
936 // Last parsed offset (in char_t units from string start)
937 ptrdiff_t offset;
938
939 // Default constructor, initializes object to failed state
940 xpath_parse_result();
941
942 // Cast to bool operator
943 operator bool() const;
944
945 // Get error description
946 const char* description() const;
947 };
948
949 // A single XPath variable
950 class PUGIXML_CLASS xpath_variable
951 {
952 friend class xpath_variable_set;
953
954 protected:
955 xpath_value_type _type;
956 xpath_variable* _next;
957
958 xpath_variable();
959
960 // Non-copyable semantics
961 xpath_variable(const xpath_variable&);
962 xpath_variable& operator=(const xpath_variable&);
963
964 public:
965 // Get variable name
966 const char_t* name() const;
967
968 // Get variable type
969 xpath_value_type type() const;
970
971 // Get variable value; no type conversion is performed, default value (false, NaN, empty string, empty node set) is returned on type mismatch error
972 bool get_boolean() const;
973 double get_number() const;
974 const char_t* get_string() const;
975 const xpath_node_set& get_node_set() const;
976
977 // Set variable value; no type conversion is performed, false is returned on type mismatch error
978 bool set(bool value);
979 bool set(double value);
980 bool set(const char_t* value);
981 bool set(const xpath_node_set& value);
982 };
983
984 // A set of XPath variables
985 class PUGIXML_CLASS xpath_variable_set
986 {
987 private:
988 xpath_variable* _data[64];
989
990 // Non-copyable semantics
991 xpath_variable_set(const xpath_variable_set&);
992 xpath_variable_set& operator=(const xpath_variable_set&);
993
994 xpath_variable* find(const char_t* name) const;
995
996 public:
997 // Default constructor/destructor
998 xpath_variable_set();
999 ~xpath_variable_set();
1000
1001 // Add a new variable or get the existing one, if the types match
1002 xpath_variable* add(const char_t* name, xpath_value_type type);
1003
1004 // Set value of an existing variable; no type conversion is performed, false is returned if there is no such variable or if types mismatch
1005 bool set(const char_t* name, bool value);
1006 bool set(const char_t* name, double value);
1007 bool set(const char_t* name, const char_t* value);
1008 bool set(const char_t* name, const xpath_node_set& value);
1009
1010 // Get existing variable by name
1011 xpath_variable* get(const char_t* name);
1012 const xpath_variable* get(const char_t* name) const;
1013 };
1014
1015 // A compiled XPath query object
1016 class PUGIXML_CLASS xpath_query
1017 {
1018 private:
1019 void* _impl;
1020 xpath_parse_result _result;
1021
1022 typedef void (*unspecified_bool_type)(xpath_query***);
1023
1024 // Non-copyable semantics
1025 xpath_query(const xpath_query&);
1026 xpath_query& operator=(const xpath_query&);
1027
1028 public:
1029 // Construct a compiled object from XPath expression.
1030 // If PUGIXML_NO_EXCEPTIONS is not defined, throws xpath_exception on compilation errors.
1031 explicit xpath_query(const char_t* query, xpath_variable_set* variables = 0);
1032
1033 // Destructor
1034 ~xpath_query();
1035
1036 // Get query expression return type
1037 xpath_value_type return_type() const;
1038
1039 // Evaluate expression as boolean value in the specified context; performs type conversion if necessary.
1040 // If PUGIXML_NO_EXCEPTIONS is not defined, throws std::bad_alloc on out of memory errors.
1041 bool evaluate_boolean(const xpath_node& n) const;
1042
1043 // Evaluate expression as double value in the specified context; performs type conversion if necessary.
1044 // If PUGIXML_NO_EXCEPTIONS is not defined, throws std::bad_alloc on out of memory errors.
1045 double evaluate_number(const xpath_node& n) const;
1046
1047 #ifndef PUGIXML_NO_STL
1048 // Evaluate expression as string value in the specified context; performs type conversion if necessary.
1049 // If PUGIXML_NO_EXCEPTIONS is not defined, throws std::bad_alloc on out of memory errors.
1050 string_t evaluate_string(const xpath_node& n) const;
1051 #endif
1052
1053 // Evaluate expression as string value in the specified context; performs type conversion if necessary.
1054 // At most capacity characters are written to the destination buffer, full result size is returned (includes terminating zero).
1055 // If PUGIXML_NO_EXCEPTIONS is not defined, throws std::bad_alloc on out of memory errors.
1056 // If PUGIXML_NO_EXCEPTIONS is defined, returns empty set instead.
1057 size_t evaluate_string(char_t* buffer, size_t capacity, const xpath_node& n) const;
1058
1059 // Evaluate expression as node set in the specified context.
1060 // If PUGIXML_NO_EXCEPTIONS is not defined, throws xpath_exception on type mismatch and std::bad_alloc on out of memory errors.
1061 // If PUGIXML_NO_EXCEPTIONS is defined, returns empty node set instead.
1062 xpath_node_set evaluate_node_set(const xpath_node& n) const;
1063
1064 // Get parsing result (used to get compilation errors in PUGIXML_NO_EXCEPTIONS mode)
1065 const xpath_parse_result& result() const;
1066
1067 // Safe bool conversion operator
1068 operator unspecified_bool_type() const;
1069
1070 // Borland C++ workaround
1071 bool operator!() const;
1072 };
1073
1074 #ifndef PUGIXML_NO_EXCEPTIONS
1075 // XPath exception class
1076 class PUGIXML_CLASS xpath_exception: public std::exception
1077 {
1078 private:
1079 xpath_parse_result _result;
1080
1081 public:
1082 // Construct exception from parse result
1083 explicit xpath_exception(const xpath_parse_result& result);
1084
1085 // Get error message
1086 virtual const char* what() const throw();
1087
1088 // Get parse result
1089 const xpath_parse_result& result() const;
1090 };
1091 #endif
1092
1093 // XPath node class (either xml_node or xml_attribute)
1094 class PUGIXML_CLASS xpath_node
1095 {
1096 private:
1097 xml_node _node;
1098 xml_attribute _attribute;
1099
1100 typedef void (*unspecified_bool_type)(xpath_node***);
1101
1102 public:
1103 // Default constructor; constructs empty XPath node
1104 xpath_node();
1105
1106 // Construct XPath node from XML node/attribute
1107 xpath_node(const xml_node& node);
1108 xpath_node(const xml_attribute& attribute, const xml_node& parent);
1109
1110 // Get node/attribute, if any
1111 xml_node node() const;
1112 xml_attribute attribute() const;
1113
1114 // Get parent of contained node/attribute
1115 xml_node parent() const;
1116
1117 // Safe bool conversion operator
1118 operator unspecified_bool_type() const;
1119
1120 // Borland C++ workaround
1121 bool operator!() const;
1122
1123 // Comparison operators
1124 bool operator==(const xpath_node& n) const;
1125 bool operator!=(const xpath_node& n) const;
1126 };
1127
1128#ifdef __BORLANDC__
1129 // Borland C++ workaround
1130 bool PUGIXML_FUNCTION operator&&(const xpath_node& lhs, bool rhs);
1131 bool PUGIXML_FUNCTION operator||(const xpath_node& lhs, bool rhs);
1132#endif
1133
1134 // A fixed-size collection of XPath nodes
1135 class PUGIXML_CLASS xpath_node_set
1136 {
1137 public:
1138 // Collection type
1139 enum type_t
1140 {
1141 type_unsorted, // Not ordered
1142 type_sorted, // Sorted by document order (ascending)
1143 type_sorted_reverse // Sorted by document order (descending)
1144 };
1145
1146 // Constant iterator type
1147 typedef const xpath_node* const_iterator;
1148
1149 // Default constructor. Constructs empty set.
1150 xpath_node_set();
1151
1152 // Constructs a set from iterator range; data is not checked for duplicates and is not sorted according to provided type, so be careful
1153 xpath_node_set(const_iterator begin, const_iterator end, type_t type = type_unsorted);
1154
1155 // Destructor
1156 ~xpath_node_set();
1157
1158 // Copy constructor/assignment operator
1159 xpath_node_set(const xpath_node_set& ns);
1160 xpath_node_set& operator=(const xpath_node_set& ns);
1161
1162 // Get collection type
1163 type_t type() const;
1164
1165 // Get collection size
1166 size_t size() const;
1167
1168 // Indexing operator
1169 const xpath_node& operator[](size_t index) const;
1170
1171 // Collection iterators
1172 const_iterator begin() const;
1173 const_iterator end() const;
1174
1175 // Sort the collection in ascending/descending order by document order
1176 void sort(bool reverse = false);
1177
1178 // Get first node in the collection by document order
1179 xpath_node first() const;
1180
1181 // Check if collection is empty
1182 bool empty() const;
1183
1184 private:
1185 type_t _type;
1186
1187 xpath_node _storage;
1188
1189 xpath_node* _begin;
1190 xpath_node* _end;
1191
1192 void _assign(const_iterator begin, const_iterator end);
1193 };
1194#endif
1195
1196#ifndef PUGIXML_NO_STL
1197 // Convert wide string to UTF8
1198 std::basic_string<char, std::char_traits<char>, std::allocator<char> > PUGIXML_FUNCTION as_utf8(const wchar_t* str);
1199 std::basic_string<char, std::char_traits<char>, std::allocator<char> > PUGIXML_FUNCTION as_utf8(const std::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> >& str);
1200
1201 // Convert UTF8 to wide string
1202 std::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> > PUGIXML_FUNCTION as_wide(const char* str);
1203 std::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> > PUGIXML_FUNCTION as_wide(const std::basic_string<char, std::char_traits<char>, std::allocator<char> >& str);
1204#endif
1205
1206 // Memory allocation function interface; returns pointer to allocated memory or NULL on failure
1207 typedef void* (*allocation_function)(size_t size);
1208
1209 // Memory deallocation function interface
1210 typedef void (*deallocation_function)(void* ptr);
1211
1212 // Override default memory management functions. All subsequent allocations/deallocations will be performed via supplied functions.
1213 void PUGIXML_FUNCTION set_memory_management_functions(allocation_function allocate, deallocation_function deallocate);
1214
1215 // Get current memory management functions
1216 allocation_function PUGIXML_FUNCTION get_memory_allocation_function();
1217 deallocation_function PUGIXML_FUNCTION get_memory_deallocation_function();
1218}
1219
1220#if !defined(PUGIXML_NO_STL) && (defined(_MSC_VER) || defined(__ICC))
1221namespace std
1222{
1223 // Workarounds for (non-standard) iterator category detection for older versions (MSVC7/IC8 and earlier)
1224 std::bidirectional_iterator_tag PUGIXML_FUNCTION _Iter_cat(const pugi::xml_node_iterator&);
1225 std::bidirectional_iterator_tag PUGIXML_FUNCTION _Iter_cat(const pugi::xml_attribute_iterator&);
1226 std::forward_iterator_tag PUGIXML_FUNCTION _Iter_cat(const pugi::xml_named_node_iterator&);
1227}
1228#endif
1229
1230#if !defined(PUGIXML_NO_STL) && defined(__SUNPRO_CC)
1231namespace std
1232{
1233 // Workarounds for (non-standard) iterator category detection
1234 std::bidirectional_iterator_tag PUGIXML_FUNCTION __iterator_category(const pugi::xml_node_iterator&);
1235 std::bidirectional_iterator_tag PUGIXML_FUNCTION __iterator_category(const pugi::xml_attribute_iterator&);
1236 std::forward_iterator_tag PUGIXML_FUNCTION __iterator_category(const pugi::xml_named_node_iterator&);
1237}
1238#endif
1239
1240#endif
1241
1242/**
1243 * Copyright (c) 2006-2012 Arseny Kapoulkine
1244 *
1245 * Permission is hereby granted, free of charge, to any person
1246 * obtaining a copy of this software and associated documentation
1247 * files (the "Software"), to deal in the Software without
1248 * restriction, including without limitation the rights to use,
1249 * copy, modify, merge, publish, distribute, sublicense, and/or sell
1250 * copies of the Software, and to permit persons to whom the
1251 * Software is furnished to do so, subject to the following
1252 * conditions:
1253 *
1254 * The above copyright notice and this permission notice shall be
1255 * included in all copies or substantial portions of the Software.
1256 *
1257 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
1258 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
1259 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
1260 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
1261 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
1262 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
1263 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
1264 * OTHER DEALINGS IN THE SOFTWARE.
1265 */
Note: See TracBrowser for help on using the repository browser.