Changeset 1055 for XmlTools2/trunk/libs
- Timestamp:
- Oct 28, 2016, 11:05:06 PM (8 years ago)
- Location:
- XmlTools2/trunk/libs
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
XmlTools2/trunk/libs/pugiconfig.hpp
r906 r1055 1 1 /** 2 * pugixml parser - version 1. 22 * pugixml parser - version 1.7 3 3 * -------------------------------------------------------- 4 * Copyright (C) 2006-201 2, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)4 * Copyright (C) 2006-2015, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) 5 5 * Report bugs and download new versions at http://pugixml.org/ 6 6 * … … 17 17 // Uncomment this to enable wchar_t mode 18 18 // #define PUGIXML_WCHAR_MODE 19 20 // Uncomment this to enable compact mode 21 // #define PUGIXML_COMPACT 19 22 20 23 // Uncomment this to disable XPath … … 33 36 // In absence of PUGIXML_CLASS/PUGIXML_FUNCTION definitions PUGIXML_API is used instead 34 37 35 // Uncomment this to switch to header-only version36 // #define PUGIXML_HEADER_ONLY37 // #include "pugixml.cpp"38 39 38 // Tune these constants to adjust memory-related behavior 40 39 // #define PUGIXML_MEMORY_PAGE_SIZE 32768 … … 42 41 // #define PUGIXML_MEMORY_XPATH_PAGE_SIZE 4096 43 42 43 // Uncomment this to switch to header-only version 44 // #define PUGIXML_HEADER_ONLY 45 46 // Uncomment this to enable long long support 47 // #define PUGIXML_HAS_LONG_LONG 48 44 49 #endif 45 50 46 51 /** 47 * Copyright (c) 2006-201 2Arseny Kapoulkine52 * Copyright (c) 2006-2015 Arseny Kapoulkine 48 53 * 49 54 * Permission is hereby granted, free of charge, to any person -
XmlTools2/trunk/libs/pugixml.cpp
r906 r1055 1 1 /** 2 * pugixml parser - version 1. 22 * pugixml parser - version 1.7 3 3 * -------------------------------------------------------- 4 * Copyright (C) 2006-201 2, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)4 * Copyright (C) 2006-2015, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) 5 5 * Report bugs and download new versions at http://pugixml.org/ 6 6 * … … 21 21 #include <string.h> 22 22 #include <assert.h> 23 #include <wchar.h> 23 #include <limits.h> 24 25 #ifdef PUGIXML_WCHAR_MODE 26 # include <wchar.h> 27 #endif 24 28 25 29 #ifndef PUGIXML_NO_XPATH … … 82 86 #endif 83 87 88 // Branch weight controls 89 #if defined(__GNUC__) 90 # define PUGI__UNLIKELY(cond) __builtin_expect(cond, 0) 91 #else 92 # define PUGI__UNLIKELY(cond) (cond) 93 #endif 94 84 95 // Simple static assertion 85 96 #define PUGI__STATIC_ASSERT(cond) { static const char condition_failed[(cond) ? 1 : -1] = {0}; (void)condition_failed[0]; } … … 96 107 using std::memcpy; 97 108 using std::memmove; 109 using std::memset; 98 110 #endif 99 111 … … 124 136 # include <stdint.h> 125 137 #else 138 namespace pugi 139 { 126 140 # ifndef _UINTPTR_T_DEFINED 127 // No native uintptr_t in MSVC6 and in some WinCE versions 128 typedef size_t uintptr_t; 129 #define _UINTPTR_T_DEFINED 141 typedef size_t uintptr_t; 130 142 # endif 131 PUGI__NS_BEGIN 143 132 144 typedef unsigned __int8 uint8_t; 133 145 typedef unsigned __int16 uint16_t; 134 146 typedef unsigned __int32 uint32_t; 135 PUGI__NS_END 147 } 136 148 #endif 137 149 … … 155 167 }; 156 168 169 // Global allocation functions are stored in class statics so that in header mode linker deduplicates them 170 // Without a template<> we'll get multiple definitions of the same static 157 171 template <typename T> allocation_function xml_memory_management_function_storage<T>::allocate = default_allocate; 158 172 template <typename T> deallocation_function xml_memory_management_function_storage<T>::deallocate = default_deallocate; … … 196 210 return lhs[count] == 0; 197 211 } 198 199 #ifdef PUGIXML_WCHAR_MODE 200 // Convert string to wide string, assuming all symbols are ASCII 201 PUGI__FN void widen_ascii(wchar_t* dest, const char* source) 202 { 203 for (const char* i = source; *i; ++i) *dest++ = *i; 204 *dest = 0; 205 } 206 #endif 212 213 // Get length of wide string, even if CRT lacks wide character support 214 PUGI__FN size_t strlength_wide(const wchar_t* s) 215 { 216 assert(s); 217 218 #ifdef PUGIXML_WCHAR_MODE 219 return wcslen(s); 220 #else 221 const wchar_t* end = s; 222 while (*end) end++; 223 return static_cast<size_t>(end - s); 224 #endif 225 } 207 226 PUGI__NS_END 208 227 209 #if !defined(PUGIXML_NO_STL) || !defined(PUGIXML_NO_XPATH) 210 // auto_ptr-like buffer holder for exception recovery 228 // auto_ptr-like object for exception recovery 211 229 PUGI__NS_BEGIN 212 struct buffer_holder213 { 214 void* data;215 void (*deleter)(void*);216 217 buffer_holder(void* data_, void (*deleter_)(void*)): data(data_), deleter(deleter_)218 { 219 } 220 221 ~ buffer_holder()230 template <typename T, typename D = void(*)(T*)> struct auto_deleter 231 { 232 T* data; 233 D deleter; 234 235 auto_deleter(T* data_, D deleter_): data(data_), deleter(deleter_) 236 { 237 } 238 239 ~auto_deleter() 222 240 { 223 241 if (data) deleter(data); 224 242 } 225 243 226 void* release()227 { 228 void* result = data;244 T* release() 245 { 246 T* result = data; 229 247 data = 0; 230 248 return result; 231 249 } 232 250 }; 251 PUGI__NS_END 252 253 #ifdef PUGIXML_COMPACT 254 PUGI__NS_BEGIN 255 class compact_hash_table 256 { 257 public: 258 compact_hash_table(): _items(0), _capacity(0), _count(0) 259 { 260 } 261 262 void clear() 263 { 264 if (_items) 265 { 266 xml_memory::deallocate(_items); 267 _items = 0; 268 _capacity = 0; 269 _count = 0; 270 } 271 } 272 273 void** find(const void* key) 274 { 275 assert(key); 276 277 if (_capacity == 0) return 0; 278 279 size_t hashmod = _capacity - 1; 280 size_t bucket = hash(key) & hashmod; 281 282 for (size_t probe = 0; probe <= hashmod; ++probe) 283 { 284 item_t& probe_item = _items[bucket]; 285 286 if (probe_item.key == key) 287 return &probe_item.value; 288 289 if (probe_item.key == 0) 290 return 0; 291 292 // hash collision, quadratic probing 293 bucket = (bucket + probe + 1) & hashmod; 294 } 295 296 assert(!"Hash table is full"); 297 return 0; 298 } 299 300 void** insert(const void* key) 301 { 302 assert(key); 303 assert(_capacity != 0 && _count < _capacity - _capacity / 4); 304 305 size_t hashmod = _capacity - 1; 306 size_t bucket = hash(key) & hashmod; 307 308 for (size_t probe = 0; probe <= hashmod; ++probe) 309 { 310 item_t& probe_item = _items[bucket]; 311 312 if (probe_item.key == 0) 313 { 314 probe_item.key = key; 315 _count++; 316 return &probe_item.value; 317 } 318 319 if (probe_item.key == key) 320 return &probe_item.value; 321 322 // hash collision, quadratic probing 323 bucket = (bucket + probe + 1) & hashmod; 324 } 325 326 assert(!"Hash table is full"); 327 return 0; 328 } 329 330 bool reserve() 331 { 332 if (_count + 16 >= _capacity - _capacity / 4) 333 return rehash(); 334 335 return true; 336 } 337 338 private: 339 struct item_t 340 { 341 const void* key; 342 void* value; 343 }; 344 345 item_t* _items; 346 size_t _capacity; 347 348 size_t _count; 349 350 bool rehash(); 351 352 static unsigned int hash(const void* key) 353 { 354 unsigned int h = static_cast<unsigned int>(reinterpret_cast<uintptr_t>(key)); 355 356 // MurmurHash3 32-bit finalizer 357 h ^= h >> 16; 358 h *= 0x85ebca6bu; 359 h ^= h >> 13; 360 h *= 0xc2b2ae35u; 361 h ^= h >> 16; 362 363 return h; 364 } 365 }; 366 367 PUGI__FN_NO_INLINE bool compact_hash_table::rehash() 368 { 369 compact_hash_table rt; 370 rt._capacity = (_capacity == 0) ? 32 : _capacity * 2; 371 rt._items = static_cast<item_t*>(xml_memory::allocate(sizeof(item_t) * rt._capacity)); 372 373 if (!rt._items) 374 return false; 375 376 memset(rt._items, 0, sizeof(item_t) * rt._capacity); 377 378 for (size_t i = 0; i < _capacity; ++i) 379 if (_items[i].key) 380 *rt.insert(_items[i].key) = _items[i].value; 381 382 if (_items) 383 xml_memory::deallocate(_items); 384 385 _capacity = rt._capacity; 386 _items = rt._items; 387 388 assert(_count == rt._count); 389 390 return true; 391 } 392 233 393 PUGI__NS_END 234 394 #endif … … 243 403 ; 244 404 245 static const uintptr_t xml_memory_page_alignment = 32; 405 #ifdef PUGIXML_COMPACT 406 static const uintptr_t xml_memory_block_alignment = 4; 407 408 static const uintptr_t xml_memory_page_alignment = sizeof(void*); 409 #else 410 static const uintptr_t xml_memory_block_alignment = sizeof(void*); 411 412 static const uintptr_t xml_memory_page_alignment = 64; 246 413 static const uintptr_t xml_memory_page_pointer_mask = ~(xml_memory_page_alignment - 1); 414 #endif 415 416 // extra metadata bits 417 static const uintptr_t xml_memory_page_contents_shared_mask = 32; 247 418 static const uintptr_t xml_memory_page_name_allocated_mask = 16; 248 419 static const uintptr_t xml_memory_page_value_allocated_mask = 8; 249 420 static const uintptr_t xml_memory_page_type_mask = 7; 250 421 422 // combined masks for string uniqueness 423 static const uintptr_t xml_memory_page_name_allocated_or_shared_mask = xml_memory_page_name_allocated_mask | xml_memory_page_contents_shared_mask; 424 static const uintptr_t xml_memory_page_value_allocated_or_shared_mask = xml_memory_page_value_allocated_mask | xml_memory_page_contents_shared_mask; 425 426 #ifdef PUGIXML_COMPACT 427 #define PUGI__GETPAGE_IMPL(header) (header).get_page() 428 #else 429 #define PUGI__GETPAGE_IMPL(header) reinterpret_cast<impl::xml_memory_page*>((header) & impl::xml_memory_page_pointer_mask) 430 #endif 431 432 #define PUGI__GETPAGE(n) PUGI__GETPAGE_IMPL((n)->header) 433 #define PUGI__NODETYPE(n) static_cast<xml_node_type>(((n)->header & impl::xml_memory_page_type_mask) + 1) 434 251 435 struct xml_allocator; 252 436 … … 255 439 static xml_memory_page* construct(void* memory) 256 440 { 257 if (!memory) return 0; //$ redundant, left for performance258 259 441 xml_memory_page* result = static_cast<xml_memory_page*>(memory); 260 442 261 443 result->allocator = 0; 262 result->memory = 0;263 444 result->prev = 0; 264 445 result->next = 0; … … 266 447 result->freed_size = 0; 267 448 449 #ifdef PUGIXML_COMPACT 450 result->compact_string_base = 0; 451 result->compact_shared_parent = 0; 452 result->compact_page_marker = 0; 453 #endif 454 268 455 return result; 269 456 } 270 457 271 458 xml_allocator* allocator; 272 273 void* memory;274 459 275 460 xml_memory_page* prev; … … 279 464 size_t freed_size; 280 465 281 char data[1]; 466 #ifdef PUGIXML_COMPACT 467 char_t* compact_string_base; 468 void* compact_shared_parent; 469 uint32_t* compact_page_marker; 470 #endif 282 471 }; 283 472 … … 292 481 xml_allocator(xml_memory_page* root): _root(root), _busy_size(root->busy_size) 293 482 { 483 #ifdef PUGIXML_COMPACT 484 _hash = 0; 485 #endif 294 486 } 295 487 296 488 xml_memory_page* allocate_page(size_t data_size) 297 489 { 298 size_t size = offsetof(xml_memory_page, data) + data_size;490 size_t size = sizeof(xml_memory_page) + data_size; 299 491 300 492 // allocate block with some alignment, leaving memory for worst-case padding … … 302 494 if (!memory) return 0; 303 495 304 // align upwards to page boundary305 void* page_memory = reinterpret_cast<void*>((reinterpret_cast<uintptr_t>(memory) + (xml_memory_page_alignment - 1)) & ~(xml_memory_page_alignment - 1));496 // align to next page boundary (note: this guarantees at least 1 usable byte before the page) 497 char* page_memory = reinterpret_cast<char*>((reinterpret_cast<uintptr_t>(memory) + xml_memory_page_alignment) & ~(xml_memory_page_alignment - 1)); 306 498 307 499 // prepare page structure 308 500 xml_memory_page* page = xml_memory_page::construct(page_memory); 309 310 page->memory = memory; 501 assert(page); 502 311 503 page->allocator = _root->allocator; 312 504 505 // record the offset for freeing the memory block 506 assert(page_memory > memory && page_memory - static_cast<char*>(memory) <= 127); 507 page_memory[-1] = static_cast<char>(page_memory - static_cast<char*>(memory)); 508 313 509 return page; 314 510 } … … 316 512 static void deallocate_page(xml_memory_page* page) 317 513 { 318 xml_memory::deallocate(page->memory); 514 char* page_memory = reinterpret_cast<char*>(page); 515 516 xml_memory::deallocate(page_memory - page_memory[-1]); 319 517 } 320 518 … … 323 521 void* allocate_memory(size_t size, xml_memory_page*& out_page) 324 522 { 325 if (_busy_size + size > xml_memory_page_size) return allocate_memory_oob(size, out_page); 326 327 void* buf = _root->data + _busy_size; 523 if (PUGI__UNLIKELY(_busy_size + size > xml_memory_page_size)) 524 return allocate_memory_oob(size, out_page); 525 526 void* buf = reinterpret_cast<char*>(_root) + sizeof(xml_memory_page) + _busy_size; 328 527 329 528 _busy_size += size; … … 334 533 } 335 534 535 #ifdef PUGIXML_COMPACT 536 void* allocate_object(size_t size, xml_memory_page*& out_page) 537 { 538 void* result = allocate_memory(size + sizeof(uint32_t), out_page); 539 if (!result) return 0; 540 541 // adjust for marker 542 ptrdiff_t offset = static_cast<char*>(result) - reinterpret_cast<char*>(out_page->compact_page_marker); 543 544 if (PUGI__UNLIKELY(static_cast<uintptr_t>(offset) >= 256 * xml_memory_block_alignment)) 545 { 546 // insert new marker 547 uint32_t* marker = static_cast<uint32_t*>(result); 548 549 *marker = static_cast<uint32_t>(reinterpret_cast<char*>(marker) - reinterpret_cast<char*>(out_page)); 550 out_page->compact_page_marker = marker; 551 552 // since we don't reuse the page space until we reallocate it, we can just pretend that we freed the marker block 553 // this will make sure deallocate_memory correctly tracks the size 554 out_page->freed_size += sizeof(uint32_t); 555 556 return marker + 1; 557 } 558 else 559 { 560 // roll back uint32_t part 561 _busy_size -= sizeof(uint32_t); 562 563 return result; 564 } 565 } 566 #else 567 void* allocate_object(size_t size, xml_memory_page*& out_page) 568 { 569 return allocate_memory(size, out_page); 570 } 571 #endif 572 336 573 void deallocate_memory(void* ptr, size_t size, xml_memory_page* page) 337 574 { 338 575 if (page == _root) page->busy_size = _busy_size; 339 576 340 assert(ptr >= page->data && ptr < page->data+ page->busy_size);577 assert(ptr >= reinterpret_cast<char*>(page) + sizeof(xml_memory_page) && ptr < reinterpret_cast<char*>(page) + sizeof(xml_memory_page) + page->busy_size); 341 578 (void)!ptr; 342 579 … … 351 588 352 589 // top page freed, just reset sizes 353 page->busy_size = page->freed_size = 0; 590 page->busy_size = 0; 591 page->freed_size = 0; 592 593 #ifdef PUGIXML_COMPACT 594 // reset compact state to maximize efficiency 595 page->compact_string_base = 0; 596 page->compact_shared_parent = 0; 597 page->compact_page_marker = 0; 598 #endif 599 354 600 _busy_size = 0; 355 601 } … … 371 617 char_t* allocate_string(size_t length) 372 618 { 619 static const size_t max_encoded_offset = (1 << 16) * xml_memory_block_alignment; 620 621 PUGI__STATIC_ASSERT(xml_memory_page_size <= max_encoded_offset); 622 373 623 // allocate memory for string and header block 374 624 size_t size = sizeof(xml_memory_string_header) + length * sizeof(char_t); 375 625 376 // round size up to pointeralignment boundary377 size_t full_size = (size + ( sizeof(void*) - 1)) & ~(sizeof(void*)- 1);626 // round size up to block alignment boundary 627 size_t full_size = (size + (xml_memory_block_alignment - 1)) & ~(xml_memory_block_alignment - 1); 378 628 379 629 xml_memory_page* page; … … 383 633 384 634 // setup header 385 ptrdiff_t page_offset = reinterpret_cast<char*>(header) - page->data; 386 387 assert(page_offset >= 0 && page_offset < (1 << 16)); 388 header->page_offset = static_cast<uint16_t>(page_offset); 635 ptrdiff_t page_offset = reinterpret_cast<char*>(header) - reinterpret_cast<char*>(page) - sizeof(xml_memory_page); 636 637 assert(page_offset % xml_memory_block_alignment == 0); 638 assert(page_offset >= 0 && static_cast<size_t>(page_offset) < max_encoded_offset); 639 header->page_offset = static_cast<uint16_t>(static_cast<size_t>(page_offset) / xml_memory_block_alignment); 389 640 390 641 // full_size == 0 for large strings that occupy the whole page 391 assert(full_size < (1 << 16) || (page->busy_size == full_size && page_offset == 0)); 392 header->full_size = static_cast<uint16_t>(full_size < (1 << 16) ? full_size : 0); 642 assert(full_size % xml_memory_block_alignment == 0); 643 assert(full_size < max_encoded_offset || (page->busy_size == full_size && page_offset == 0)); 644 header->full_size = static_cast<uint16_t>(full_size < max_encoded_offset ? full_size / xml_memory_block_alignment : 0); 393 645 394 646 // round-trip through void* to avoid 'cast increases required alignment of target type' warning … … 404 656 // get header 405 657 xml_memory_string_header* header = static_cast<xml_memory_string_header*>(static_cast<void*>(string)) - 1; 658 assert(header); 406 659 407 660 // deallocate 408 size_t page_offset = offsetof(xml_memory_page, data) + header->page_offset;661 size_t page_offset = sizeof(xml_memory_page) + header->page_offset * xml_memory_block_alignment; 409 662 xml_memory_page* page = reinterpret_cast<xml_memory_page*>(static_cast<void*>(reinterpret_cast<char*>(header) - page_offset)); 410 663 411 664 // if full_size == 0 then this string occupies the whole page 412 size_t full_size = header->full_size == 0 ? page->busy_size : header->full_size ;665 size_t full_size = header->full_size == 0 ? page->busy_size : header->full_size * xml_memory_block_alignment; 413 666 414 667 deallocate_memory(header, full_size, page); 668 } 669 670 bool reserve() 671 { 672 #ifdef PUGIXML_COMPACT 673 return _hash->reserve(); 674 #else 675 return true; 676 #endif 415 677 } 416 678 417 679 xml_memory_page* _root; 418 680 size_t _busy_size; 681 682 #ifdef PUGIXML_COMPACT 683 compact_hash_table* _hash; 684 #endif 419 685 }; 420 686 … … 450 716 _root->prev->next = page; 451 717 _root->prev = page; 452 } 453 454 // allocate inside page 455 page->busy_size = size; 456 457 return page->data; 718 719 page->busy_size = size; 720 } 721 722 return reinterpret_cast<char*>(page) + sizeof(xml_memory_page); 458 723 } 459 724 PUGI__NS_END 460 725 726 #ifdef PUGIXML_COMPACT 727 PUGI__NS_BEGIN 728 static const uintptr_t compact_alignment_log2 = 2; 729 static const uintptr_t compact_alignment = 1 << compact_alignment_log2; 730 731 class compact_header 732 { 733 public: 734 compact_header(xml_memory_page* page, unsigned int flags) 735 { 736 PUGI__STATIC_ASSERT(xml_memory_block_alignment == compact_alignment); 737 738 ptrdiff_t offset = (reinterpret_cast<char*>(this) - reinterpret_cast<char*>(page->compact_page_marker)); 739 assert(offset % compact_alignment == 0 && static_cast<uintptr_t>(offset) < 256 * compact_alignment); 740 741 _page = static_cast<unsigned char>(offset >> compact_alignment_log2); 742 _flags = static_cast<unsigned char>(flags); 743 } 744 745 void operator&=(uintptr_t mod) 746 { 747 _flags &= static_cast<unsigned char>(mod); 748 } 749 750 void operator|=(uintptr_t mod) 751 { 752 _flags |= static_cast<unsigned char>(mod); 753 } 754 755 uintptr_t operator&(uintptr_t mod) const 756 { 757 return _flags & mod; 758 } 759 760 xml_memory_page* get_page() const 761 { 762 // round-trip through void* to silence 'cast increases required alignment of target type' warnings 763 const char* page_marker = reinterpret_cast<const char*>(this) - (_page << compact_alignment_log2); 764 const char* page = page_marker - *reinterpret_cast<const uint32_t*>(static_cast<const void*>(page_marker)); 765 766 return const_cast<xml_memory_page*>(reinterpret_cast<const xml_memory_page*>(static_cast<const void*>(page))); 767 } 768 769 private: 770 unsigned char _page; 771 unsigned char _flags; 772 }; 773 774 PUGI__FN xml_memory_page* compact_get_page(const void* object, int header_offset) 775 { 776 const compact_header* header = reinterpret_cast<const compact_header*>(static_cast<const char*>(object) - header_offset); 777 778 return header->get_page(); 779 } 780 781 template <int header_offset, typename T> PUGI__FN_NO_INLINE T* compact_get_value(const void* object) 782 { 783 return static_cast<T*>(*compact_get_page(object, header_offset)->allocator->_hash->find(object)); 784 } 785 786 template <int header_offset, typename T> PUGI__FN_NO_INLINE void compact_set_value(const void* object, T* value) 787 { 788 *compact_get_page(object, header_offset)->allocator->_hash->insert(object) = value; 789 } 790 791 template <typename T, int header_offset, int start = -126> class compact_pointer 792 { 793 public: 794 compact_pointer(): _data(0) 795 { 796 } 797 798 void operator=(const compact_pointer& rhs) 799 { 800 *this = rhs + 0; 801 } 802 803 void operator=(T* value) 804 { 805 if (value) 806 { 807 // value is guaranteed to be compact-aligned; 'this' is not 808 // our decoding is based on 'this' aligned to compact alignment downwards (see operator T*) 809 // so for negative offsets (e.g. -3) we need to adjust the diff by compact_alignment - 1 to 810 // compensate for arithmetic shift rounding for negative values 811 ptrdiff_t diff = reinterpret_cast<char*>(value) - reinterpret_cast<char*>(this); 812 ptrdiff_t offset = ((diff + int(compact_alignment - 1)) >> compact_alignment_log2) - start; 813 814 if (static_cast<uintptr_t>(offset) <= 253) 815 _data = static_cast<unsigned char>(offset + 1); 816 else 817 { 818 compact_set_value<header_offset>(this, value); 819 820 _data = 255; 821 } 822 } 823 else 824 _data = 0; 825 } 826 827 operator T*() const 828 { 829 if (_data) 830 { 831 if (_data < 255) 832 { 833 uintptr_t base = reinterpret_cast<uintptr_t>(this) & ~(compact_alignment - 1); 834 835 return reinterpret_cast<T*>(base + ((_data - 1 + start) << compact_alignment_log2)); 836 } 837 else 838 return compact_get_value<header_offset, T>(this); 839 } 840 else 841 return 0; 842 } 843 844 T* operator->() const 845 { 846 return *this; 847 } 848 849 private: 850 unsigned char _data; 851 }; 852 853 template <typename T, int header_offset> class compact_pointer_parent 854 { 855 public: 856 compact_pointer_parent(): _data(0) 857 { 858 } 859 860 void operator=(const compact_pointer_parent& rhs) 861 { 862 *this = rhs + 0; 863 } 864 865 void operator=(T* value) 866 { 867 if (value) 868 { 869 // value is guaranteed to be compact-aligned; 'this' is not 870 // our decoding is based on 'this' aligned to compact alignment downwards (see operator T*) 871 // so for negative offsets (e.g. -3) we need to adjust the diff by compact_alignment - 1 to 872 // compensate for arithmetic shift behavior for negative values 873 ptrdiff_t diff = reinterpret_cast<char*>(value) - reinterpret_cast<char*>(this); 874 ptrdiff_t offset = ((diff + int(compact_alignment - 1)) >> compact_alignment_log2) + 65533; 875 876 if (static_cast<uintptr_t>(offset) <= 65533) 877 { 878 _data = static_cast<unsigned short>(offset + 1); 879 } 880 else 881 { 882 xml_memory_page* page = compact_get_page(this, header_offset); 883 884 if (PUGI__UNLIKELY(page->compact_shared_parent == 0)) 885 page->compact_shared_parent = value; 886 887 if (page->compact_shared_parent == value) 888 { 889 _data = 65534; 890 } 891 else 892 { 893 compact_set_value<header_offset>(this, value); 894 895 _data = 65535; 896 } 897 } 898 } 899 else 900 { 901 _data = 0; 902 } 903 } 904 905 operator T*() const 906 { 907 if (_data) 908 { 909 if (_data < 65534) 910 { 911 uintptr_t base = reinterpret_cast<uintptr_t>(this) & ~(compact_alignment - 1); 912 913 return reinterpret_cast<T*>(base + ((_data - 1 - 65533) << compact_alignment_log2)); 914 } 915 else if (_data == 65534) 916 return static_cast<T*>(compact_get_page(this, header_offset)->compact_shared_parent); 917 else 918 return compact_get_value<header_offset, T>(this); 919 } 920 else 921 return 0; 922 } 923 924 T* operator->() const 925 { 926 return *this; 927 } 928 929 private: 930 uint16_t _data; 931 }; 932 933 template <int header_offset, int base_offset> class compact_string 934 { 935 public: 936 compact_string(): _data(0) 937 { 938 } 939 940 void operator=(const compact_string& rhs) 941 { 942 *this = rhs + 0; 943 } 944 945 void operator=(char_t* value) 946 { 947 if (value) 948 { 949 xml_memory_page* page = compact_get_page(this, header_offset); 950 951 if (PUGI__UNLIKELY(page->compact_string_base == 0)) 952 page->compact_string_base = value; 953 954 ptrdiff_t offset = value - page->compact_string_base; 955 956 if (static_cast<uintptr_t>(offset) < (65535 << 7)) 957 { 958 // round-trip through void* to silence 'cast increases required alignment of target type' warnings 959 uint16_t* base = reinterpret_cast<uint16_t*>(static_cast<void*>(reinterpret_cast<char*>(this) - base_offset)); 960 961 if (*base == 0) 962 { 963 *base = static_cast<uint16_t>((offset >> 7) + 1); 964 _data = static_cast<unsigned char>((offset & 127) + 1); 965 } 966 else 967 { 968 ptrdiff_t remainder = offset - ((*base - 1) << 7); 969 970 if (static_cast<uintptr_t>(remainder) <= 253) 971 { 972 _data = static_cast<unsigned char>(remainder + 1); 973 } 974 else 975 { 976 compact_set_value<header_offset>(this, value); 977 978 _data = 255; 979 } 980 } 981 } 982 else 983 { 984 compact_set_value<header_offset>(this, value); 985 986 _data = 255; 987 } 988 } 989 else 990 { 991 _data = 0; 992 } 993 } 994 995 operator char_t*() const 996 { 997 if (_data) 998 { 999 if (_data < 255) 1000 { 1001 xml_memory_page* page = compact_get_page(this, header_offset); 1002 1003 // round-trip through void* to silence 'cast increases required alignment of target type' warnings 1004 const uint16_t* base = reinterpret_cast<const uint16_t*>(static_cast<const void*>(reinterpret_cast<const char*>(this) - base_offset)); 1005 assert(*base); 1006 1007 ptrdiff_t offset = ((*base - 1) << 7) + (_data - 1); 1008 1009 return page->compact_string_base + offset; 1010 } 1011 else 1012 { 1013 return compact_get_value<header_offset, char_t>(this); 1014 } 1015 } 1016 else 1017 return 0; 1018 } 1019 1020 private: 1021 unsigned char _data; 1022 }; 1023 PUGI__NS_END 1024 #endif 1025 1026 #ifdef PUGIXML_COMPACT 461 1027 namespace pugi 462 1028 { 463 /// A 'name=value' XML attribute structure.464 1029 struct xml_attribute_struct 465 1030 { 466 /// Default ctor 467 xml_attribute_struct(impl::xml_memory_page* page): header(reinterpret_cast<uintptr_t>(page)), name(0), value(0), prev_attribute_c(0), next_attribute(0) 468 { 469 } 470 471 uintptr_t header; 472 473 char_t* name; ///< Pointer to attribute name. 474 char_t* value; ///< Pointer to attribute value. 475 476 xml_attribute_struct* prev_attribute_c; ///< Previous attribute (cyclic list) 477 xml_attribute_struct* next_attribute; ///< Next attribute 1031 xml_attribute_struct(impl::xml_memory_page* page): header(page, 0), namevalue_base(0) 1032 { 1033 PUGI__STATIC_ASSERT(sizeof(xml_attribute_struct) == 8); 1034 } 1035 1036 impl::compact_header header; 1037 1038 uint16_t namevalue_base; 1039 1040 impl::compact_string<4, 2> name; 1041 impl::compact_string<5, 3> value; 1042 1043 impl::compact_pointer<xml_attribute_struct, 6> prev_attribute_c; 1044 impl::compact_pointer<xml_attribute_struct, 7, 0> next_attribute; 478 1045 }; 479 1046 480 /// An XML document tree node.481 1047 struct xml_node_struct 482 1048 { 483 /// Default ctor 484 /// \param type - node type 485 xml_node_struct(impl::xml_memory_page* page, xml_node_type type): header(reinterpret_cast<uintptr_t>(page) | (type - 1)), parent(0), name(0), value(0), first_child(0), prev_sibling_c(0), next_sibling(0), first_attribute(0) 486 { 487 } 488 489 uintptr_t header; 490 491 xml_node_struct* parent; ///< Pointer to parent 492 493 char_t* name; ///< Pointer to element name. 494 char_t* value; ///< Pointer to any associated string data. 495 496 xml_node_struct* first_child; ///< First child 497 498 xml_node_struct* prev_sibling_c; ///< Left brother (cyclic list) 499 xml_node_struct* next_sibling; ///< Right brother 500 501 xml_attribute_struct* first_attribute; ///< First attribute 1049 xml_node_struct(impl::xml_memory_page* page, xml_node_type type): header(page, type - 1), namevalue_base(0) 1050 { 1051 PUGI__STATIC_ASSERT(sizeof(xml_node_struct) == 12); 1052 } 1053 1054 impl::compact_header header; 1055 1056 uint16_t namevalue_base; 1057 1058 impl::compact_string<4, 2> name; 1059 impl::compact_string<5, 3> value; 1060 1061 impl::compact_pointer_parent<xml_node_struct, 6> parent; 1062 1063 impl::compact_pointer<xml_node_struct, 8, 0> first_child; 1064 1065 impl::compact_pointer<xml_node_struct, 9> prev_sibling_c; 1066 impl::compact_pointer<xml_node_struct, 10, 0> next_sibling; 1067 1068 impl::compact_pointer<xml_attribute_struct, 11, 0> first_attribute; 502 1069 }; 503 1070 } 1071 #else 1072 namespace pugi 1073 { 1074 struct xml_attribute_struct 1075 { 1076 xml_attribute_struct(impl::xml_memory_page* page): header(reinterpret_cast<uintptr_t>(page)), name(0), value(0), prev_attribute_c(0), next_attribute(0) 1077 { 1078 } 1079 1080 uintptr_t header; 1081 1082 char_t* name; 1083 char_t* value; 1084 1085 xml_attribute_struct* prev_attribute_c; 1086 xml_attribute_struct* next_attribute; 1087 }; 1088 1089 struct xml_node_struct 1090 { 1091 xml_node_struct(impl::xml_memory_page* page, xml_node_type type): header(reinterpret_cast<uintptr_t>(page) | (type - 1)), name(0), value(0), parent(0), first_child(0), prev_sibling_c(0), next_sibling(0), first_attribute(0) 1092 { 1093 } 1094 1095 uintptr_t header; 1096 1097 char_t* name; 1098 char_t* value; 1099 1100 xml_node_struct* parent; 1101 1102 xml_node_struct* first_child; 1103 1104 xml_node_struct* prev_sibling_c; 1105 xml_node_struct* next_sibling; 1106 1107 xml_attribute_struct* first_attribute; 1108 }; 1109 } 1110 #endif 504 1111 505 1112 PUGI__NS_BEGIN 1113 struct xml_extra_buffer 1114 { 1115 char_t* buffer; 1116 xml_extra_buffer* next; 1117 }; 1118 506 1119 struct xml_document_struct: public xml_node_struct, public xml_allocator 507 1120 { 508 xml_document_struct(xml_memory_page* page): xml_node_struct(page, node_document), xml_allocator(page), buffer(0) 509 { 1121 xml_document_struct(xml_memory_page* page): xml_node_struct(page, node_document), xml_allocator(page), buffer(0), extra_buffers(0) 1122 { 1123 #ifdef PUGIXML_COMPACT 1124 _hash = &hash; 1125 #endif 510 1126 } 511 1127 512 1128 const char_t* buffer; 1129 1130 xml_extra_buffer* extra_buffers; 1131 1132 #ifdef PUGIXML_COMPACT 1133 compact_hash_table hash; 1134 #endif 513 1135 }; 514 1136 515 inline xml_allocator& get_allocator(const xml_node_struct* node) 516 { 517 assert(node); 518 519 return *reinterpret_cast<xml_memory_page*>(node->header & xml_memory_page_pointer_mask)->allocator; 1137 template <typename Object> inline xml_allocator& get_allocator(const Object* object) 1138 { 1139 assert(object); 1140 1141 return *PUGI__GETPAGE(object)->allocator; 1142 } 1143 1144 template <typename Object> inline xml_document_struct& get_document(const Object* object) 1145 { 1146 assert(object); 1147 1148 return *static_cast<xml_document_struct*>(PUGI__GETPAGE(object)->allocator); 520 1149 } 521 1150 PUGI__NS_END … … 526 1155 { 527 1156 xml_memory_page* page; 528 void* memory = alloc.allocate_memory(sizeof(xml_attribute_struct), page); 1157 void* memory = alloc.allocate_object(sizeof(xml_attribute_struct), page); 1158 if (!memory) return 0; 529 1159 530 1160 return new (memory) xml_attribute_struct(page); … … 534 1164 { 535 1165 xml_memory_page* page; 536 void* memory = alloc.allocate_memory(sizeof(xml_node_struct), page); 1166 void* memory = alloc.allocate_object(sizeof(xml_node_struct), page); 1167 if (!memory) return 0; 537 1168 538 1169 return new (memory) xml_node_struct(page, type); … … 541 1172 inline void destroy_attribute(xml_attribute_struct* a, xml_allocator& alloc) 542 1173 { 543 uintptr_t header = a->header; 544 545 if (header & impl::xml_memory_page_name_allocated_mask) alloc.deallocate_string(a->name); 546 if (header & impl::xml_memory_page_value_allocated_mask) alloc.deallocate_string(a->value); 547 548 alloc.deallocate_memory(a, sizeof(xml_attribute_struct), reinterpret_cast<xml_memory_page*>(header & xml_memory_page_pointer_mask)); 1174 if (a->header & impl::xml_memory_page_name_allocated_mask) 1175 alloc.deallocate_string(a->name); 1176 1177 if (a->header & impl::xml_memory_page_value_allocated_mask) 1178 alloc.deallocate_string(a->value); 1179 1180 alloc.deallocate_memory(a, sizeof(xml_attribute_struct), PUGI__GETPAGE(a)); 549 1181 } 550 1182 551 1183 inline void destroy_node(xml_node_struct* n, xml_allocator& alloc) 552 1184 { 553 uintptr_t header = n->header; 554 555 if (header & impl::xml_memory_page_name_allocated_mask) alloc.deallocate_string(n->name); 556 if (header & impl::xml_memory_page_value_allocated_mask) alloc.deallocate_string(n->value); 1185 if (n->header & impl::xml_memory_page_name_allocated_mask) 1186 alloc.deallocate_string(n->name); 1187 1188 if (n->header & impl::xml_memory_page_value_allocated_mask) 1189 alloc.deallocate_string(n->value); 557 1190 558 1191 for (xml_attribute_struct* attr = n->first_attribute; attr; ) … … 574 1207 } 575 1208 576 alloc.deallocate_memory(n, sizeof(xml_node_struct), reinterpret_cast<xml_memory_page*>(header & xml_memory_page_pointer_mask)); 577 } 578 579 PUGI__FN_NO_INLINE xml_node_struct* append_node(xml_node_struct* node, xml_allocator& alloc, xml_node_type type = node_element) 580 { 1209 alloc.deallocate_memory(n, sizeof(xml_node_struct), PUGI__GETPAGE(n)); 1210 } 1211 1212 inline void append_node(xml_node_struct* child, xml_node_struct* node) 1213 { 1214 child->parent = node; 1215 1216 xml_node_struct* head = node->first_child; 1217 1218 if (head) 1219 { 1220 xml_node_struct* tail = head->prev_sibling_c; 1221 1222 tail->next_sibling = child; 1223 child->prev_sibling_c = tail; 1224 head->prev_sibling_c = child; 1225 } 1226 else 1227 { 1228 node->first_child = child; 1229 child->prev_sibling_c = child; 1230 } 1231 } 1232 1233 inline void prepend_node(xml_node_struct* child, xml_node_struct* node) 1234 { 1235 child->parent = node; 1236 1237 xml_node_struct* head = node->first_child; 1238 1239 if (head) 1240 { 1241 child->prev_sibling_c = head->prev_sibling_c; 1242 head->prev_sibling_c = child; 1243 } 1244 else 1245 child->prev_sibling_c = child; 1246 1247 child->next_sibling = head; 1248 node->first_child = child; 1249 } 1250 1251 inline void insert_node_after(xml_node_struct* child, xml_node_struct* node) 1252 { 1253 xml_node_struct* parent = node->parent; 1254 1255 child->parent = parent; 1256 1257 if (node->next_sibling) 1258 node->next_sibling->prev_sibling_c = child; 1259 else 1260 parent->first_child->prev_sibling_c = child; 1261 1262 child->next_sibling = node->next_sibling; 1263 child->prev_sibling_c = node; 1264 1265 node->next_sibling = child; 1266 } 1267 1268 inline void insert_node_before(xml_node_struct* child, xml_node_struct* node) 1269 { 1270 xml_node_struct* parent = node->parent; 1271 1272 child->parent = parent; 1273 1274 if (node->prev_sibling_c->next_sibling) 1275 node->prev_sibling_c->next_sibling = child; 1276 else 1277 parent->first_child = child; 1278 1279 child->prev_sibling_c = node->prev_sibling_c; 1280 child->next_sibling = node; 1281 1282 node->prev_sibling_c = child; 1283 } 1284 1285 inline void remove_node(xml_node_struct* node) 1286 { 1287 xml_node_struct* parent = node->parent; 1288 1289 if (node->next_sibling) 1290 node->next_sibling->prev_sibling_c = node->prev_sibling_c; 1291 else 1292 parent->first_child->prev_sibling_c = node->prev_sibling_c; 1293 1294 if (node->prev_sibling_c->next_sibling) 1295 node->prev_sibling_c->next_sibling = node->next_sibling; 1296 else 1297 parent->first_child = node->next_sibling; 1298 1299 node->parent = 0; 1300 node->prev_sibling_c = 0; 1301 node->next_sibling = 0; 1302 } 1303 1304 inline void append_attribute(xml_attribute_struct* attr, xml_node_struct* node) 1305 { 1306 xml_attribute_struct* head = node->first_attribute; 1307 1308 if (head) 1309 { 1310 xml_attribute_struct* tail = head->prev_attribute_c; 1311 1312 tail->next_attribute = attr; 1313 attr->prev_attribute_c = tail; 1314 head->prev_attribute_c = attr; 1315 } 1316 else 1317 { 1318 node->first_attribute = attr; 1319 attr->prev_attribute_c = attr; 1320 } 1321 } 1322 1323 inline void prepend_attribute(xml_attribute_struct* attr, xml_node_struct* node) 1324 { 1325 xml_attribute_struct* head = node->first_attribute; 1326 1327 if (head) 1328 { 1329 attr->prev_attribute_c = head->prev_attribute_c; 1330 head->prev_attribute_c = attr; 1331 } 1332 else 1333 attr->prev_attribute_c = attr; 1334 1335 attr->next_attribute = head; 1336 node->first_attribute = attr; 1337 } 1338 1339 inline void insert_attribute_after(xml_attribute_struct* attr, xml_attribute_struct* place, xml_node_struct* node) 1340 { 1341 if (place->next_attribute) 1342 place->next_attribute->prev_attribute_c = attr; 1343 else 1344 node->first_attribute->prev_attribute_c = attr; 1345 1346 attr->next_attribute = place->next_attribute; 1347 attr->prev_attribute_c = place; 1348 place->next_attribute = attr; 1349 } 1350 1351 inline void insert_attribute_before(xml_attribute_struct* attr, xml_attribute_struct* place, xml_node_struct* node) 1352 { 1353 if (place->prev_attribute_c->next_attribute) 1354 place->prev_attribute_c->next_attribute = attr; 1355 else 1356 node->first_attribute = attr; 1357 1358 attr->prev_attribute_c = place->prev_attribute_c; 1359 attr->next_attribute = place; 1360 place->prev_attribute_c = attr; 1361 } 1362 1363 inline void remove_attribute(xml_attribute_struct* attr, xml_node_struct* node) 1364 { 1365 if (attr->next_attribute) 1366 attr->next_attribute->prev_attribute_c = attr->prev_attribute_c; 1367 else 1368 node->first_attribute->prev_attribute_c = attr->prev_attribute_c; 1369 1370 if (attr->prev_attribute_c->next_attribute) 1371 attr->prev_attribute_c->next_attribute = attr->next_attribute; 1372 else 1373 node->first_attribute = attr->next_attribute; 1374 1375 attr->prev_attribute_c = 0; 1376 attr->next_attribute = 0; 1377 } 1378 1379 PUGI__FN_NO_INLINE xml_node_struct* append_new_node(xml_node_struct* node, xml_allocator& alloc, xml_node_type type = node_element) 1380 { 1381 if (!alloc.reserve()) return 0; 1382 581 1383 xml_node_struct* child = allocate_node(alloc, type); 582 1384 if (!child) return 0; 583 1385 584 child->parent = node; 585 586 xml_node_struct* first_child = node->first_child; 587 588 if (first_child) 589 { 590 xml_node_struct* last_child = first_child->prev_sibling_c; 591 592 last_child->next_sibling = child; 593 child->prev_sibling_c = last_child; 594 first_child->prev_sibling_c = child; 595 } 596 else 597 { 598 node->first_child = child; 599 child->prev_sibling_c = child; 600 } 601 1386 append_node(child, node); 1387 602 1388 return child; 603 1389 } 604 1390 605 PUGI__FN_NO_INLINE xml_attribute_struct* append_attribute_ll(xml_node_struct* node, xml_allocator& alloc) 606 { 607 xml_attribute_struct* a = allocate_attribute(alloc); 608 if (!a) return 0; 609 610 xml_attribute_struct* first_attribute = node->first_attribute; 611 612 if (first_attribute) 613 { 614 xml_attribute_struct* last_attribute = first_attribute->prev_attribute_c; 615 616 last_attribute->next_attribute = a; 617 a->prev_attribute_c = last_attribute; 618 first_attribute->prev_attribute_c = a; 619 } 620 else 621 { 622 node->first_attribute = a; 623 a->prev_attribute_c = a; 624 } 625 626 return a; 1391 PUGI__FN_NO_INLINE xml_attribute_struct* append_new_attribute(xml_node_struct* node, xml_allocator& alloc) 1392 { 1393 if (!alloc.reserve()) return 0; 1394 1395 xml_attribute_struct* attr = allocate_attribute(alloc); 1396 if (!attr) return 0; 1397 1398 append_attribute(attr, node); 1399 1400 return attr; 627 1401 } 628 1402 PUGI__NS_END … … 824 1598 }; 825 1599 826 template <size_t size> struct wchar_selector; 827 828 template <> struct wchar_selector<2> 829 { 830 typedef uint16_t type; 831 typedef utf16_counter counter; 832 typedef utf16_writer writer; 833 }; 834 835 template <> struct wchar_selector<4> 836 { 837 typedef uint32_t type; 838 typedef utf32_counter counter; 839 typedef utf32_writer writer; 840 }; 841 842 typedef wchar_selector<sizeof(wchar_t)>::counter wchar_counter; 843 typedef wchar_selector<sizeof(wchar_t)>::writer wchar_writer; 844 845 template <typename Traits, typename opt_swap = opt_false> struct utf_decoder 846 { 847 static inline typename Traits::value_type decode_utf8_block(const uint8_t* data, size_t size, typename Traits::value_type result) 1600 struct utf8_decoder 1601 { 1602 typedef uint8_t type; 1603 1604 template <typename Traits> static inline typename Traits::value_type process(const uint8_t* data, size_t size, typename Traits::value_type result, Traits) 848 1605 { 849 1606 const uint8_t utf8_byte_mask = 0x3f; … … 906 1663 return result; 907 1664 } 908 909 static inline typename Traits::value_type decode_utf16_block(const uint16_t* data, size_t size, typename Traits::value_type result) 910 { 911 const uint16_t* end = data + size; 912 913 while (data < end) 1665 }; 1666 1667 template <typename opt_swap> struct utf16_decoder 1668 { 1669 typedef uint16_t type; 1670 1671 template <typename Traits> static inline typename Traits::value_type process(const uint16_t* data, size_t size, typename Traits::value_type result, Traits) 1672 { 1673 while (size) 914 1674 { 915 1675 uint16_t lead = opt_swap::value ? endian_swap(*data) : *data; … … 920 1680 result = Traits::low(result, lead); 921 1681 data += 1; 1682 size -= 1; 922 1683 } 923 1684 // U+E000..U+FFFF … … 926 1687 result = Traits::low(result, lead); 927 1688 data += 1; 1689 size -= 1; 928 1690 } 929 1691 // surrogate pair lead 930 else if (static_cast<unsigned int>(lead - 0xD800) < 0x400 && data + 1 < end)1692 else if (static_cast<unsigned int>(lead - 0xD800) < 0x400 && size >= 2) 931 1693 { 932 1694 uint16_t next = opt_swap::value ? endian_swap(data[1]) : data[1]; … … 936 1698 result = Traits::high(result, 0x10000 + ((lead & 0x3ff) << 10) + (next & 0x3ff)); 937 1699 data += 2; 1700 size -= 2; 938 1701 } 939 1702 else 940 1703 { 941 1704 data += 1; 1705 size -= 1; 942 1706 } 943 1707 } … … 945 1709 { 946 1710 data += 1; 1711 size -= 1; 947 1712 } 948 1713 } … … 950 1715 return result; 951 1716 } 952 953 static inline typename Traits::value_type decode_utf32_block(const uint32_t* data, size_t size, typename Traits::value_type result) 954 { 955 const uint32_t* end = data + size; 956 957 while (data < end) 1717 }; 1718 1719 template <typename opt_swap> struct utf32_decoder 1720 { 1721 typedef uint32_t type; 1722 1723 template <typename Traits> static inline typename Traits::value_type process(const uint32_t* data, size_t size, typename Traits::value_type result, Traits) 1724 { 1725 while (size) 958 1726 { 959 1727 uint32_t lead = opt_swap::value ? endian_swap(*data) : *data; … … 964 1732 result = Traits::low(result, lead); 965 1733 data += 1; 1734 size -= 1; 966 1735 } 967 1736 // U+10000..U+10FFFF … … 970 1739 result = Traits::high(result, lead); 971 1740 data += 1; 1741 size -= 1; 972 1742 } 973 1743 } … … 975 1745 return result; 976 1746 } 977 978 static inline typename Traits::value_type decode_latin1_block(const uint8_t* data, size_t size, typename Traits::value_type result) 979 { 980 for (size_t i = 0; i < size; ++i) 981 { 982 result = Traits::low(result, data[i]); 1747 }; 1748 1749 struct latin1_decoder 1750 { 1751 typedef uint8_t type; 1752 1753 template <typename Traits> static inline typename Traits::value_type process(const uint8_t* data, size_t size, typename Traits::value_type result, Traits) 1754 { 1755 while (size) 1756 { 1757 result = Traits::low(result, *data); 1758 data += 1; 1759 size -= 1; 983 1760 } 984 1761 985 1762 return result; 986 1763 } 987 988 static inline typename Traits::value_type decode_wchar_block_impl(const uint16_t* data, size_t size, typename Traits::value_type result)989 {990 return decode_utf16_block(data, size, result);991 }992 993 static inline typename Traits::value_type decode_wchar_block_impl(const uint32_t* data, size_t size, typename Traits::value_type result)994 {995 return decode_utf32_block(data, size, result);996 }997 998 static inline typename Traits::value_type decode_wchar_block(const wchar_t* data, size_t size, typename Traits::value_type result)999 {1000 return decode_wchar_block_impl(reinterpret_cast<const wchar_selector<sizeof(wchar_t)>::type*>(data), size, result);1001 }1002 1764 }; 1003 1765 1004 template <typename T> PUGI__FN void convert_utf_endian_swap(T* result, const T* data, size_t length) 1005 { 1006 for (size_t i = 0; i < length; ++i) result[i] = endian_swap(data[i]); 1007 } 1766 template <size_t size> struct wchar_selector; 1767 1768 template <> struct wchar_selector<2> 1769 { 1770 typedef uint16_t type; 1771 typedef utf16_counter counter; 1772 typedef utf16_writer writer; 1773 typedef utf16_decoder<opt_false> decoder; 1774 }; 1775 1776 template <> struct wchar_selector<4> 1777 { 1778 typedef uint32_t type; 1779 typedef utf32_counter counter; 1780 typedef utf32_writer writer; 1781 typedef utf32_decoder<opt_false> decoder; 1782 }; 1783 1784 typedef wchar_selector<sizeof(wchar_t)>::counter wchar_counter; 1785 typedef wchar_selector<sizeof(wchar_t)>::writer wchar_writer; 1786 1787 struct wchar_decoder 1788 { 1789 typedef wchar_t type; 1790 1791 template <typename Traits> static inline typename Traits::value_type process(const wchar_t* data, size_t size, typename Traits::value_type result, Traits traits) 1792 { 1793 typedef wchar_selector<sizeof(wchar_t)>::decoder decoder; 1794 1795 return decoder::process(reinterpret_cast<const typename decoder::type*>(data), size, result, traits); 1796 } 1797 }; 1008 1798 1009 1799 #ifdef PUGIXML_WCHAR_MODE 1010 1800 PUGI__FN void convert_wchar_endian_swap(wchar_t* result, const wchar_t* data, size_t length) 1011 1801 { 1012 for (size_t i = 0; i < length; ++i) result[i] = static_cast<wchar_t>(endian_swap(static_cast<wchar_selector<sizeof(wchar_t)>::type>(data[i]))); 1802 for (size_t i = 0; i < length; ++i) 1803 result[i] = static_cast<wchar_t>(endian_swap(static_cast<wchar_selector<sizeof(wchar_t)>::type>(data[i]))); 1013 1804 } 1014 1805 #endif … … 1157 1948 PUGI__FN bool get_mutable_buffer(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable) 1158 1949 { 1950 size_t length = size / sizeof(char_t); 1951 1159 1952 if (is_mutable) 1160 1953 { 1161 1954 out_buffer = static_cast<char_t*>(const_cast<void*>(contents)); 1955 out_length = length; 1162 1956 } 1163 1957 else 1164 1958 { 1165 void* buffer = xml_memory::allocate(size > 0 ? size : 1);1959 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t))); 1166 1960 if (!buffer) return false; 1167 1961 1168 memcpy(buffer, contents, size); 1169 1170 out_buffer = static_cast<char_t*>(buffer); 1171 } 1172 1173 out_length = size / sizeof(char_t); 1962 if (contents) 1963 memcpy(buffer, contents, length * sizeof(char_t)); 1964 else 1965 assert(length == 0); 1966 1967 buffer[length] = 0; 1968 1969 out_buffer = buffer; 1970 out_length = length + 1; 1971 } 1174 1972 1175 1973 return true; … … 1186 1984 { 1187 1985 const char_t* data = static_cast<const char_t*>(contents); 1188 1986 size_t length = size / sizeof(char_t); 1987 1189 1988 if (is_mutable) 1190 1989 { 1191 out_buffer = const_cast<char_t*>(data); 1990 char_t* buffer = const_cast<char_t*>(data); 1991 1992 convert_wchar_endian_swap(buffer, data, length); 1993 1994 out_buffer = buffer; 1995 out_length = length; 1192 1996 } 1193 1997 else 1194 1998 { 1195 out_buffer = static_cast<char_t*>(xml_memory::allocate(size > 0 ? size : 1)); 1196 if (!out_buffer) return false; 1197 } 1198 1199 out_length = size / sizeof(char_t); 1200 1201 convert_wchar_endian_swap(out_buffer, data, out_length); 1999 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t))); 2000 if (!buffer) return false; 2001 2002 convert_wchar_endian_swap(buffer, data, length); 2003 buffer[length] = 0; 2004 2005 out_buffer = buffer; 2006 out_length = length + 1; 2007 } 1202 2008 1203 2009 return true; 1204 2010 } 1205 2011 1206 PUGI__FN bool convert_buffer_utf8(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size) 1207 { 1208 const uint8_t* data = static_cast<const uint8_t*>(contents); 2012 template <typename D> PUGI__FN bool convert_buffer_generic(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, D) 2013 { 2014 const typename D::type* data = static_cast<const typename D::type*>(contents); 2015 size_t data_length = size / sizeof(typename D::type); 1209 2016 1210 2017 // first pass: get length in wchar_t units 1211 out_length = utf_decoder<wchar_counter>::decode_utf8_block(data, size, 0);2018 size_t length = D::process(data, data_length, 0, wchar_counter()); 1212 2019 1213 2020 // allocate buffer of suitable length 1214 out_buffer = static_cast<char_t*>(xml_memory::allocate((out_length > 0 ? out_length : 1) * sizeof(char_t))); 1215 if (!out_buffer) return false; 1216 1217 // second pass: convert utf8 input to wchar_t 1218 wchar_writer::value_type out_begin = reinterpret_cast<wchar_writer::value_type>(out_buffer); 1219 wchar_writer::value_type out_end = utf_decoder<wchar_writer>::decode_utf8_block(data, size, out_begin); 1220 1221 assert(out_end == out_begin + out_length); 1222 (void)!out_end; 1223 1224 return true; 1225 } 1226 1227 template <typename opt_swap> PUGI__FN bool convert_buffer_utf16(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap) 1228 { 1229 const uint16_t* data = static_cast<const uint16_t*>(contents); 1230 size_t length = size / sizeof(uint16_t); 1231 1232 // first pass: get length in wchar_t units 1233 out_length = utf_decoder<wchar_counter, opt_swap>::decode_utf16_block(data, length, 0); 1234 1235 // allocate buffer of suitable length 1236 out_buffer = static_cast<char_t*>(xml_memory::allocate((out_length > 0 ? out_length : 1) * sizeof(char_t))); 1237 if (!out_buffer) return false; 2021 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t))); 2022 if (!buffer) return false; 1238 2023 1239 2024 // second pass: convert utf16 input to wchar_t 1240 wchar_writer::value_type out_begin = reinterpret_cast<wchar_writer::value_type>(out_buffer); 1241 wchar_writer::value_type out_end = utf_decoder<wchar_writer, opt_swap>::decode_utf16_block(data, length, out_begin); 1242 1243 assert(out_end == out_begin + out_length); 1244 (void)!out_end; 1245 1246 return true; 1247 } 1248 1249 template <typename opt_swap> PUGI__FN bool convert_buffer_utf32(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap) 1250 { 1251 const uint32_t* data = static_cast<const uint32_t*>(contents); 1252 size_t length = size / sizeof(uint32_t); 1253 1254 // first pass: get length in wchar_t units 1255 out_length = utf_decoder<wchar_counter, opt_swap>::decode_utf32_block(data, length, 0); 1256 1257 // allocate buffer of suitable length 1258 out_buffer = static_cast<char_t*>(xml_memory::allocate((out_length > 0 ? out_length : 1) * sizeof(char_t))); 1259 if (!out_buffer) return false; 1260 1261 // second pass: convert utf32 input to wchar_t 1262 wchar_writer::value_type out_begin = reinterpret_cast<wchar_writer::value_type>(out_buffer); 1263 wchar_writer::value_type out_end = utf_decoder<wchar_writer, opt_swap>::decode_utf32_block(data, length, out_begin); 1264 1265 assert(out_end == out_begin + out_length); 1266 (void)!out_end; 1267 1268 return true; 1269 } 1270 1271 PUGI__FN bool convert_buffer_latin1(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size) 1272 { 1273 const uint8_t* data = static_cast<const uint8_t*>(contents); 1274 1275 // get length in wchar_t units 1276 out_length = size; 1277 1278 // allocate buffer of suitable length 1279 out_buffer = static_cast<char_t*>(xml_memory::allocate((out_length > 0 ? out_length : 1) * sizeof(char_t))); 1280 if (!out_buffer) return false; 1281 1282 // convert latin1 input to wchar_t 1283 wchar_writer::value_type out_begin = reinterpret_cast<wchar_writer::value_type>(out_buffer); 1284 wchar_writer::value_type out_end = utf_decoder<wchar_writer>::decode_latin1_block(data, size, out_begin); 1285 1286 assert(out_end == out_begin + out_length); 1287 (void)!out_end; 2025 wchar_writer::value_type obegin = reinterpret_cast<wchar_writer::value_type>(buffer); 2026 wchar_writer::value_type oend = D::process(data, data_length, obegin, wchar_writer()); 2027 2028 assert(oend == obegin + length); 2029 *oend = 0; 2030 2031 out_buffer = buffer; 2032 out_length = length + 1; 1288 2033 1289 2034 return true; … … 1296 2041 1297 2042 // fast path: no conversion required 1298 if (encoding == wchar_encoding) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable); 2043 if (encoding == wchar_encoding) 2044 return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable); 1299 2045 1300 2046 // only endian-swapping is required 1301 if (need_endian_swap_utf(encoding, wchar_encoding)) return convert_buffer_endian_swap(out_buffer, out_length, contents, size, is_mutable); 2047 if (need_endian_swap_utf(encoding, wchar_encoding)) 2048 return convert_buffer_endian_swap(out_buffer, out_length, contents, size, is_mutable); 1302 2049 1303 2050 // source encoding is utf8 1304 if (encoding == encoding_utf8) return convert_buffer_utf8(out_buffer, out_length, contents, size); 2051 if (encoding == encoding_utf8) 2052 return convert_buffer_generic(out_buffer, out_length, contents, size, utf8_decoder()); 1305 2053 1306 2054 // source encoding is utf16 … … 1310 2058 1311 2059 return (native_encoding == encoding) ? 1312 convert_buffer_ utf16(out_buffer, out_length, contents, size, opt_false()) :1313 convert_buffer_ utf16(out_buffer, out_length, contents, size, opt_true());2060 convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_false>()) : 2061 convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_true>()); 1314 2062 } 1315 2063 … … 1320 2068 1321 2069 return (native_encoding == encoding) ? 1322 convert_buffer_ utf32(out_buffer, out_length, contents, size, opt_false()) :1323 convert_buffer_ utf32(out_buffer, out_length, contents, size, opt_true());2070 convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_false>()) : 2071 convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_true>()); 1324 2072 } 1325 2073 1326 2074 // source encoding is latin1 1327 if (encoding == encoding_latin1) return convert_buffer_latin1(out_buffer, out_length, contents, size); 2075 if (encoding == encoding_latin1) 2076 return convert_buffer_generic(out_buffer, out_length, contents, size, latin1_decoder()); 1328 2077 1329 2078 assert(!"Invalid encoding"); … … 1331 2080 } 1332 2081 #else 1333 template <typename opt_swap> PUGI__FN bool convert_buffer_utf16(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap)1334 { 1335 const uint16_t* data = static_cast<const uint16_t*>(contents);1336 size_t length = size / sizeof(uint16_t);2082 template <typename D> PUGI__FN bool convert_buffer_generic(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, D) 2083 { 2084 const typename D::type* data = static_cast<const typename D::type*>(contents); 2085 size_t data_length = size / sizeof(typename D::type); 1337 2086 1338 2087 // first pass: get length in utf8 units 1339 out_length = utf_decoder<utf8_counter, opt_swap>::decode_utf16_block(data, length, 0);2088 size_t length = D::process(data, data_length, 0, utf8_counter()); 1340 2089 1341 2090 // allocate buffer of suitable length 1342 out_buffer = static_cast<char_t*>(xml_memory::allocate((out_length > 0 ? out_length :1) * sizeof(char_t)));1343 if (! out_buffer) return false;2091 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t))); 2092 if (!buffer) return false; 1344 2093 1345 2094 // second pass: convert utf16 input to utf8 1346 uint8_t* out_begin = reinterpret_cast<uint8_t*>(out_buffer); 1347 uint8_t* out_end = utf_decoder<utf8_writer, opt_swap>::decode_utf16_block(data, length, out_begin); 1348 1349 assert(out_end == out_begin + out_length); 1350 (void)!out_end; 1351 1352 return true; 1353 } 1354 1355 template <typename opt_swap> PUGI__FN bool convert_buffer_utf32(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap) 1356 { 1357 const uint32_t* data = static_cast<const uint32_t*>(contents); 1358 size_t length = size / sizeof(uint32_t); 1359 1360 // first pass: get length in utf8 units 1361 out_length = utf_decoder<utf8_counter, opt_swap>::decode_utf32_block(data, length, 0); 1362 1363 // allocate buffer of suitable length 1364 out_buffer = static_cast<char_t*>(xml_memory::allocate((out_length > 0 ? out_length : 1) * sizeof(char_t))); 1365 if (!out_buffer) return false; 1366 1367 // second pass: convert utf32 input to utf8 1368 uint8_t* out_begin = reinterpret_cast<uint8_t*>(out_buffer); 1369 uint8_t* out_end = utf_decoder<utf8_writer, opt_swap>::decode_utf32_block(data, length, out_begin); 1370 1371 assert(out_end == out_begin + out_length); 1372 (void)!out_end; 2095 uint8_t* obegin = reinterpret_cast<uint8_t*>(buffer); 2096 uint8_t* oend = D::process(data, data_length, obegin, utf8_writer()); 2097 2098 assert(oend == obegin + length); 2099 *oend = 0; 2100 2101 out_buffer = buffer; 2102 out_length = length + 1; 1373 2103 1374 2104 return true; … … 1387 2117 { 1388 2118 const uint8_t* data = static_cast<const uint8_t*>(contents); 2119 size_t data_length = size; 1389 2120 1390 2121 // get size of prefix that does not need utf8 conversion 1391 size_t prefix_length = get_latin1_7bit_prefix_length(data, size);1392 assert(prefix_length <= size);2122 size_t prefix_length = get_latin1_7bit_prefix_length(data, data_length); 2123 assert(prefix_length <= data_length); 1393 2124 1394 2125 const uint8_t* postfix = data + prefix_length; 1395 size_t postfix_length = size- prefix_length;2126 size_t postfix_length = data_length - prefix_length; 1396 2127 1397 2128 // if no conversion is needed, just return the original buffer … … 1399 2130 1400 2131 // first pass: get length in utf8 units 1401 out_length = prefix_length + utf_decoder<utf8_counter>::decode_latin1_block(postfix, postfix_length, 0);2132 size_t length = prefix_length + latin1_decoder::process(postfix, postfix_length, 0, utf8_counter()); 1402 2133 1403 2134 // allocate buffer of suitable length 1404 out_buffer = static_cast<char_t*>(xml_memory::allocate((out_length > 0 ? out_length :1) * sizeof(char_t)));1405 if (! out_buffer) return false;2135 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t))); 2136 if (!buffer) return false; 1406 2137 1407 2138 // second pass: convert latin1 input to utf8 1408 memcpy(out_buffer, data, prefix_length); 1409 1410 uint8_t* out_begin = reinterpret_cast<uint8_t*>(out_buffer); 1411 uint8_t* out_end = utf_decoder<utf8_writer>::decode_latin1_block(postfix, postfix_length, out_begin + prefix_length); 1412 1413 assert(out_end == out_begin + out_length); 1414 (void)!out_end; 2139 memcpy(buffer, data, prefix_length); 2140 2141 uint8_t* obegin = reinterpret_cast<uint8_t*>(buffer); 2142 uint8_t* oend = latin1_decoder::process(postfix, postfix_length, obegin + prefix_length, utf8_writer()); 2143 2144 assert(oend == obegin + length); 2145 *oend = 0; 2146 2147 out_buffer = buffer; 2148 out_length = length + 1; 1415 2149 1416 2150 return true; … … 1420 2154 { 1421 2155 // fast path: no conversion required 1422 if (encoding == encoding_utf8) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable); 2156 if (encoding == encoding_utf8) 2157 return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable); 1423 2158 1424 2159 // source encoding is utf16 … … 1428 2163 1429 2164 return (native_encoding == encoding) ? 1430 convert_buffer_ utf16(out_buffer, out_length, contents, size, opt_false()) :1431 convert_buffer_ utf16(out_buffer, out_length, contents, size, opt_true());2165 convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_false>()) : 2166 convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_true>()); 1432 2167 } 1433 2168 … … 1438 2173 1439 2174 return (native_encoding == encoding) ? 1440 convert_buffer_ utf32(out_buffer, out_length, contents, size, opt_false()) :1441 convert_buffer_ utf32(out_buffer, out_length, contents, size, opt_true());2175 convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_false>()) : 2176 convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_true>()); 1442 2177 } 1443 2178 1444 2179 // source encoding is latin1 1445 if (encoding == encoding_latin1) return convert_buffer_latin1(out_buffer, out_length, contents, size, is_mutable); 2180 if (encoding == encoding_latin1) 2181 return convert_buffer_latin1(out_buffer, out_length, contents, size, is_mutable); 1446 2182 1447 2183 assert(!"Invalid encoding"); … … 1453 2189 { 1454 2190 // get length in utf8 characters 1455 return utf_decoder<utf8_counter>::decode_wchar_block(str, length, 0);2191 return wchar_decoder::process(str, length, 0, utf8_counter()); 1456 2192 } 1457 2193 … … 1460 2196 // convert to utf8 1461 2197 uint8_t* begin = reinterpret_cast<uint8_t*>(buffer); 1462 uint8_t* end = utf_decoder<utf8_writer>::decode_wchar_block(str, length, begin);2198 uint8_t* end = wchar_decoder::process(str, length, begin, utf8_writer()); 1463 2199 1464 2200 assert(begin + size == end); 1465 2201 (void)!end; 1466 1467 // zero-terminate 1468 buffer[size] = 0; 2202 (void)!size; 1469 2203 } 1470 2204 … … 1490 2224 1491 2225 // first pass: get length in wchar_t units 1492 size_t length = utf _decoder<wchar_counter>::decode_utf8_block(data, size, 0);2226 size_t length = utf8_decoder::process(data, size, 0, wchar_counter()); 1493 2227 1494 2228 // allocate resulting string … … 1500 2234 { 1501 2235 wchar_writer::value_type begin = reinterpret_cast<wchar_writer::value_type>(&result[0]); 1502 wchar_writer::value_type end = utf _decoder<wchar_writer>::decode_utf8_block(data, size, begin);2236 wchar_writer::value_type end = utf8_decoder::process(data, size, begin, wchar_writer()); 1503 2237 1504 2238 assert(begin + length == end); … … 1510 2244 #endif 1511 2245 1512 inline bool strcpy_insitu_allow(size_t length, uintptr_t allocated, char_t* target) 1513 { 1514 assert(target); 2246 template <typename Header> 2247 inline bool strcpy_insitu_allow(size_t length, const Header& header, uintptr_t header_mask, char_t* target) 2248 { 2249 // never reuse shared memory 2250 if (header & xml_memory_page_contents_shared_mask) return false; 2251 1515 2252 size_t target_length = strlength(target); 1516 2253 1517 2254 // always reuse document buffer memory if possible 1518 if ( !allocated) return target_length >= length;2255 if ((header & header_mask) == 0) return target_length >= length; 1519 2256 1520 2257 // reuse heap memory if waste is not too great … … 1524 2261 } 1525 2262 1526 PUGI__FN bool strcpy_insitu(char_t*& dest, uintptr_t& header, uintptr_t header_mask, const char_t* source) 1527 { 1528 size_t source_length = strlength(source); 1529 2263 template <typename String, typename Header> 2264 PUGI__FN bool strcpy_insitu(String& dest, Header& header, uintptr_t header_mask, const char_t* source, size_t source_length) 2265 { 1530 2266 if (source_length == 0) 1531 2267 { 1532 2268 // empty string and null pointer are equivalent, so just deallocate old memory 1533 xml_allocator* alloc = reinterpret_cast<xml_memory_page*>(header & xml_memory_page_pointer_mask)->allocator;2269 xml_allocator* alloc = PUGI__GETPAGE_IMPL(header)->allocator; 1534 2270 1535 2271 if (header & header_mask) alloc->deallocate_string(dest); … … 1541 2277 return true; 1542 2278 } 1543 else if (dest && strcpy_insitu_allow(source_length, header &header_mask, dest))2279 else if (dest && strcpy_insitu_allow(source_length, header, header_mask, dest)) 1544 2280 { 1545 2281 // we can reuse old buffer, so just copy the new data (including zero terminator) 1546 memcpy(dest, source, (source_length + 1) * sizeof(char_t)); 2282 memcpy(dest, source, source_length * sizeof(char_t)); 2283 dest[source_length] = 0; 1547 2284 1548 2285 return true; … … 1550 2287 else 1551 2288 { 1552 xml_allocator* alloc = reinterpret_cast<xml_memory_page*>(header & xml_memory_page_pointer_mask)->allocator; 2289 xml_allocator* alloc = PUGI__GETPAGE_IMPL(header)->allocator; 2290 2291 if (!alloc->reserve()) return false; 1553 2292 1554 2293 // allocate new buffer … … 1557 2296 1558 2297 // copy the string (including zero terminator) 1559 memcpy(buf, source, (source_length + 1) * sizeof(char_t)); 2298 memcpy(buf, source, source_length * sizeof(char_t)); 2299 buf[source_length] = 0; 1560 2300 1561 2301 // deallocate old buffer (*after* the above to protect against overlapping memory and/or allocation failures) … … 1654 2394 for (;;) 1655 2395 { 1656 if (static_cast<unsigned int>( ch- '0') <= 9)2396 if (static_cast<unsigned int>(static_cast<unsigned int>(ch) - '0') <= 9) 1657 2397 ucsc = 10 * ucsc + (ch - '0'); 1658 2398 else if (ch == ';') … … 1752 2492 } 1753 2493 1754 // Utility macro for last character handling 1755 #define ENDSWITH(c, e) ((c) == (e) || ((c) == 0 && endch == (e))) 2494 // Parser utilities 2495 #define PUGI__ENDSWITH(c, e) ((c) == (e) || ((c) == 0 && endch == (e))) 2496 #define PUGI__SKIPWS() { while (PUGI__IS_CHARTYPE(*s, ct_space)) ++s; } 2497 #define PUGI__OPTSET(OPT) ( optmsk & (OPT) ) 2498 #define PUGI__PUSHNODE(TYPE) { cursor = append_new_node(cursor, alloc, TYPE); if (!cursor) PUGI__THROW_ERROR(status_out_of_memory, s); } 2499 #define PUGI__POPNODE() { cursor = cursor->parent; } 2500 #define PUGI__SCANFOR(X) { while (*s != 0 && !(X)) ++s; } 2501 #define PUGI__SCANWHILE(X) { while (X) ++s; } 2502 #define PUGI__SCANWHILE_UNROLL(X) { for (;;) { char_t ss = s[0]; if (PUGI__UNLIKELY(!(X))) { break; } ss = s[1]; if (PUGI__UNLIKELY(!(X))) { s += 1; break; } ss = s[2]; if (PUGI__UNLIKELY(!(X))) { s += 2; break; } ss = s[3]; if (PUGI__UNLIKELY(!(X))) { s += 3; break; } s += 4; } } 2503 #define PUGI__ENDSEG() { ch = *s; *s = 0; ++s; } 2504 #define PUGI__THROW_ERROR(err, m) return error_offset = m, error_status = err, static_cast<char_t*>(0) 2505 #define PUGI__CHECK_ERROR(err, m) { if (*s == 0) PUGI__THROW_ERROR(err, m); } 1756 2506 1757 2507 PUGI__FN char_t* strconv_comment(char_t* s, char_t endch) … … 1761 2511 while (true) 1762 2512 { 1763 while (!PUGI__IS_CHARTYPE(*s, ct_parse_comment)) ++s;2513 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_comment)); 1764 2514 1765 2515 if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair … … 1769 2519 if (*s == '\n') g.push(s, 1); 1770 2520 } 1771 else if (s[0] == '-' && s[1] == '-' && ENDSWITH(s[2], '>')) // comment ends here2521 else if (s[0] == '-' && s[1] == '-' && PUGI__ENDSWITH(s[2], '>')) // comment ends here 1772 2522 { 1773 2523 *g.flush(s) = 0; … … 1789 2539 while (true) 1790 2540 { 1791 while (!PUGI__IS_CHARTYPE(*s, ct_parse_cdata)) ++s;2541 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_cdata)); 1792 2542 1793 2543 if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair … … 1797 2547 if (*s == '\n') g.push(s, 1); 1798 2548 } 1799 else if (s[0] == ']' && s[1] == ']' && ENDSWITH(s[2], '>')) // CDATA ends here2549 else if (s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>')) // CDATA ends here 1800 2550 { 1801 2551 *g.flush(s) = 0; … … 1813 2563 typedef char_t* (*strconv_pcdata_t)(char_t*); 1814 2564 1815 template <typename opt_ eol, typename opt_escape> struct strconv_pcdata_impl2565 template <typename opt_trim, typename opt_eol, typename opt_escape> struct strconv_pcdata_impl 1816 2566 { 1817 2567 static char_t* parse(char_t* s) 1818 2568 { 1819 2569 gap g; 1820 2570 2571 char_t* begin = s; 2572 1821 2573 while (true) 1822 2574 { 1823 while (!PUGI__IS_CHARTYPE(*s, ct_parse_pcdata)) ++s;1824 2575 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_pcdata)); 2576 1825 2577 if (*s == '<') // PCDATA ends here 1826 2578 { 1827 *g.flush(s) = 0; 2579 char_t* end = g.flush(s); 2580 2581 if (opt_trim::value) 2582 while (end > begin && PUGI__IS_CHARTYPE(end[-1], ct_space)) 2583 --end; 2584 2585 *end = 0; 1828 2586 1829 2587 return s + 1; … … 1841 2599 else if (*s == 0) 1842 2600 { 2601 char_t* end = g.flush(s); 2602 2603 if (opt_trim::value) 2604 while (end > begin && PUGI__IS_CHARTYPE(end[-1], ct_space)) 2605 --end; 2606 2607 *end = 0; 2608 1843 2609 return s; 1844 2610 } … … 1850 2616 PUGI__FN strconv_pcdata_t get_strconv_pcdata(unsigned int optmask) 1851 2617 { 1852 PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20); 1853 1854 switch ((optmask >> 4) & 3) // get bitmask for flags (eol escapes) 1855 { 1856 case 0: return strconv_pcdata_impl<opt_false, opt_false>::parse; 1857 case 1: return strconv_pcdata_impl<opt_false, opt_true>::parse; 1858 case 2: return strconv_pcdata_impl<opt_true, opt_false>::parse; 1859 case 3: return strconv_pcdata_impl<opt_true, opt_true>::parse; 1860 default: return 0; // should not get here 2618 PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_trim_pcdata == 0x0800); 2619 2620 switch (((optmask >> 4) & 3) | ((optmask >> 9) & 4)) // get bitmask for flags (eol escapes trim) 2621 { 2622 case 0: return strconv_pcdata_impl<opt_false, opt_false, opt_false>::parse; 2623 case 1: return strconv_pcdata_impl<opt_false, opt_false, opt_true>::parse; 2624 case 2: return strconv_pcdata_impl<opt_false, opt_true, opt_false>::parse; 2625 case 3: return strconv_pcdata_impl<opt_false, opt_true, opt_true>::parse; 2626 case 4: return strconv_pcdata_impl<opt_true, opt_false, opt_false>::parse; 2627 case 5: return strconv_pcdata_impl<opt_true, opt_false, opt_true>::parse; 2628 case 6: return strconv_pcdata_impl<opt_true, opt_true, opt_false>::parse; 2629 case 7: return strconv_pcdata_impl<opt_true, opt_true, opt_true>::parse; 2630 default: assert(false); return 0; // should not get here 1861 2631 } 1862 2632 } … … 1883 2653 while (true) 1884 2654 { 1885 while (!PUGI__IS_CHARTYPE(*s, ct_parse_attr_ws | ct_space)) ++s;2655 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr_ws | ct_space)); 1886 2656 1887 2657 if (*s == end_quote) … … 1924 2694 while (true) 1925 2695 { 1926 while (!PUGI__IS_CHARTYPE(*s, ct_parse_attr_ws)) ++s;2696 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr_ws)); 1927 2697 1928 2698 if (*s == end_quote) … … 1960 2730 while (true) 1961 2731 { 1962 while (!PUGI__IS_CHARTYPE(*s, ct_parse_attr)) ++s;2732 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr)); 1963 2733 1964 2734 if (*s == end_quote) … … 1992 2762 while (true) 1993 2763 { 1994 while (!PUGI__IS_CHARTYPE(*s, ct_parse_attr)) ++s;2764 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr)); 1995 2765 1996 2766 if (*s == end_quote) … … 2035 2805 case 14: return strconv_attribute_impl<opt_false>::parse_wnorm; 2036 2806 case 15: return strconv_attribute_impl<opt_true>::parse_wnorm; 2037 default: return 0; // should not get here2807 default: assert(false); return 0; // should not get here 2038 2808 } 2039 2809 } … … 2051 2821 { 2052 2822 xml_allocator alloc; 2823 xml_allocator* alloc_state; 2053 2824 char_t* error_offset; 2054 2825 xml_parse_status error_status; 2055 2826 2056 // Parser utilities. 2057 #define PUGI__SKIPWS() { while (PUGI__IS_CHARTYPE(*s, ct_space)) ++s; } 2058 #define PUGI__OPTSET(OPT) ( optmsk & (OPT) ) 2059 #define PUGI__PUSHNODE(TYPE) { cursor = append_node(cursor, alloc, TYPE); if (!cursor) PUGI__THROW_ERROR(status_out_of_memory, s); } 2060 #define PUGI__POPNODE() { cursor = cursor->parent; } 2061 #define PUGI__SCANFOR(X) { while (*s != 0 && !(X)) ++s; } 2062 #define PUGI__SCANWHILE(X) { while ((X)) ++s; } 2063 #define PUGI__ENDSEG() { ch = *s; *s = 0; ++s; } 2064 #define PUGI__THROW_ERROR(err, m) return error_offset = m, error_status = err, static_cast<char_t*>(0) 2065 #define PUGI__CHECK_ERROR(err, m) { if (*s == 0) PUGI__THROW_ERROR(err, m); } 2066 2067 xml_parser(const xml_allocator& alloc_): alloc(alloc_), error_offset(0), error_status(status_ok) 2068 { 2827 xml_parser(xml_allocator* alloc_): alloc(*alloc_), alloc_state(alloc_), error_offset(0), error_status(status_ok) 2828 { 2829 } 2830 2831 ~xml_parser() 2832 { 2833 *alloc_state = alloc; 2069 2834 } 2070 2835 … … 2102 2867 if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s); 2103 2868 2104 s += 4;2869 s += 3; 2105 2870 } 2106 2871 else PUGI__THROW_ERROR(status_bad_doctype, s); … … 2111 2876 char_t* parse_doctype_ignore(char_t* s) 2112 2877 { 2878 size_t depth = 0; 2879 2113 2880 assert(s[0] == '<' && s[1] == '!' && s[2] == '['); 2114 s ++;2881 s += 3; 2115 2882 2116 2883 while (*s) … … 2119 2886 { 2120 2887 // nested ignore section 2121 s = parse_doctype_ignore(s);2122 if (!s) return s;2888 s += 3; 2889 depth++; 2123 2890 } 2124 2891 else if (s[0] == ']' && s[1] == ']' && s[2] == '>') … … 2127 2894 s += 3; 2128 2895 2129 return s; 2896 if (depth == 0) 2897 return s; 2898 2899 depth--; 2130 2900 } 2131 2901 else s++; … … 2135 2905 } 2136 2906 2137 char_t* parse_doctype_group(char_t* s, char_t endch, bool toplevel) 2138 { 2139 assert(s[0] == '<' && s[1] == '!'); 2140 s++; 2907 char_t* parse_doctype_group(char_t* s, char_t endch) 2908 { 2909 size_t depth = 0; 2910 2911 assert((s[0] == '<' || s[0] == 0) && s[1] == '!'); 2912 s += 2; 2141 2913 2142 2914 while (*s) … … 2153 2925 { 2154 2926 // some control group 2155 s = parse_doctype_group(s, endch, false);2156 if (!s) return s;2927 s += 2; 2928 depth++; 2157 2929 } 2158 2930 } … … 2165 2937 else if (*s == '>') 2166 2938 { 2939 if (depth == 0) 2940 return s; 2941 2942 depth--; 2167 2943 s++; 2168 2169 return s;2170 2944 } 2171 2945 else s++; 2172 2946 } 2173 2947 2174 if ( !toplevel|| endch != '>') PUGI__THROW_ERROR(status_bad_doctype, s);2948 if (depth != 0 || endch != '>') PUGI__THROW_ERROR(status_bad_doctype, s); 2175 2949 2176 2950 return s; … … 2205 2979 { 2206 2980 // Scan for terminating '-->'. 2207 PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && ENDSWITH(s[2], '>'));2981 PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && PUGI__ENDSWITH(s[2], '>')); 2208 2982 PUGI__CHECK_ERROR(status_bad_comment, s); 2209 2983 … … 2237 3011 { 2238 3012 // Scan for terminating ']]>'. 2239 PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && ENDSWITH(s[2], '>'));3013 PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>')); 2240 3014 PUGI__CHECK_ERROR(status_bad_cdata, s); 2241 3015 … … 2246 3020 { 2247 3021 // Scan for terminating ']]>'. 2248 PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && ENDSWITH(s[2], '>'));3022 PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>')); 2249 3023 PUGI__CHECK_ERROR(status_bad_cdata, s); 2250 3024 … … 2256 3030 else PUGI__THROW_ERROR(status_bad_cdata, s); 2257 3031 } 2258 else if (s[0] == 'D' && s[1] == 'O' && s[2] == 'C' && s[3] == 'T' && s[4] == 'Y' && s[5] == 'P' && ENDSWITH(s[6], 'E'))3032 else if (s[0] == 'D' && s[1] == 'O' && s[2] == 'C' && s[3] == 'T' && s[4] == 'Y' && s[5] == 'P' && PUGI__ENDSWITH(s[6], 'E')) 2259 3033 { 2260 3034 s -= 2; … … 2264 3038 char_t* mark = s + 9; 2265 3039 2266 s = parse_doctype_group(s, endch , true);3040 s = parse_doctype_group(s, endch); 2267 3041 if (!s) return s; 2268 3042 3043 assert((*s == 0 && endch == '>') || *s == '>'); 3044 if (*s) *s++ = 0; 3045 2269 3046 if (PUGI__OPTSET(parse_doctype)) 2270 3047 { … … 2274 3051 2275 3052 cursor->value = mark; 2276 2277 assert((s[0] == 0 && endch == '>') || s[-1] == '>');2278 s[*s == 0 ? 0 : -1] = 0;2279 2280 PUGI__POPNODE();2281 3053 } 2282 3054 } … … 2330 3102 { 2331 3103 // empty node 2332 if (! ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_pi, s);3104 if (!PUGI__ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_pi, s); 2333 3105 s += (*s == '>'); 2334 3106 … … 2342 3114 char_t* value = s; 2343 3115 2344 PUGI__SCANFOR(s[0] == '?' && ENDSWITH(s[1], '>'));3116 PUGI__SCANFOR(s[0] == '?' && PUGI__ENDSWITH(s[1], '>')); 2345 3117 PUGI__CHECK_ERROR(status_bad_pi, s); 2346 3118 … … 2357 3129 // store value and step over > 2358 3130 cursor->value = value; 3131 2359 3132 PUGI__POPNODE(); 2360 3133 … … 2369 3142 { 2370 3143 // scan for tag end 2371 PUGI__SCANFOR(s[0] == '?' && ENDSWITH(s[1], '>'));3144 PUGI__SCANFOR(s[0] == '?' && PUGI__ENDSWITH(s[1], '>')); 2372 3145 PUGI__CHECK_ERROR(status_bad_pi, s); 2373 3146 … … 2381 3154 } 2382 3155 2383 char_t* parse (char_t* s, xml_node_struct* xmldoc, unsigned int optmsk, char_t endch)3156 char_t* parse_tree(char_t* s, xml_node_struct* root, unsigned int optmsk, char_t endch) 2384 3157 { 2385 3158 strconv_attribute_t strconv_attribute = get_strconv_attribute(optmsk); … … 2387 3160 2388 3161 char_t ch = 0; 2389 xml_node_struct* cursor = xmldoc;3162 xml_node_struct* cursor = root; 2390 3163 char_t* mark = s; 2391 3164 … … 2403 3176 cursor->name = s; 2404 3177 2405 PUGI__SCANWHILE (PUGI__IS_CHARTYPE(*s, ct_symbol)); // Scan for a terminator.3178 PUGI__SCANWHILE_UNROLL(PUGI__IS_CHARTYPE(ss, ct_symbol)); // Scan for a terminator. 2406 3179 PUGI__ENDSEG(); // Save char in 'ch', terminate & step over. 2407 3180 … … 2419 3192 if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // <... #... 2420 3193 { 2421 xml_attribute_struct* a = append_ attribute_ll(cursor, alloc); // Make space for this attribute.3194 xml_attribute_struct* a = append_new_attribute(cursor, alloc); // Make space for this attribute. 2422 3195 if (!a) PUGI__THROW_ERROR(status_out_of_memory, s); 2423 3196 2424 3197 a->name = s; // Save the offset. 2425 3198 2426 PUGI__SCANWHILE(PUGI__IS_CHARTYPE(*s, ct_symbol)); // Scan for a terminator. 2427 PUGI__CHECK_ERROR(status_bad_attribute, s); //$ redundant, left for performance 2428 3199 PUGI__SCANWHILE_UNROLL(PUGI__IS_CHARTYPE(ss, ct_symbol)); // Scan for a terminator. 2429 3200 PUGI__ENDSEG(); // Save char in 'ch', terminate & step over. 2430 PUGI__CHECK_ERROR(status_bad_attribute, s); //$ redundant, left for performance2431 3201 2432 3202 if (PUGI__IS_CHARTYPE(ch, ct_space)) 2433 3203 { 2434 3204 PUGI__SKIPWS(); // Eat any whitespace. 2435 PUGI__CHECK_ERROR(status_bad_attribute, s); //$ redundant, left for performance2436 3205 2437 3206 ch = *s; … … 2496 3265 else if (ch == '/') // '<#.../' 2497 3266 { 2498 if (! ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_start_element, s);3267 if (!PUGI__ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_start_element, s); 2499 3268 2500 3269 PUGI__POPNODE(); // Pop. … … 2549 3318 2550 3319 assert(cursor); 2551 if ( (cursor->header & xml_memory_page_type_mask) + 1== node_declaration) goto LOC_ATTRIBUTES;3320 if (PUGI__NODETYPE(cursor) == node_declaration) goto LOC_ATTRIBUTES; 2552 3321 } 2553 3322 else if (*s == '!') // '<!...' … … 2565 3334 PUGI__SKIPWS(); // Eat whitespace if no genuine PCDATA here. 2566 3335 2567 if (*s == '<' )3336 if (*s == '<' || !*s) 2568 3337 { 2569 3338 // We skipped some whitespace characters because otherwise we would take the tag branch instead of PCDATA one 2570 3339 assert(mark != s); 2571 3340 2572 if (!PUGI__OPTSET(parse_ws_pcdata | parse_ws_pcdata_single) )3341 if (!PUGI__OPTSET(parse_ws_pcdata | parse_ws_pcdata_single) || PUGI__OPTSET(parse_trim_pcdata)) 2573 3342 { 2574 3343 continue; … … 2576 3345 else if (PUGI__OPTSET(parse_ws_pcdata_single)) 2577 3346 { 2578 if (s[ 1] != '/' || cursor->first_child) continue;3347 if (s[0] != '<' || s[1] != '/' || cursor->first_child) continue; 2579 3348 } 2580 3349 } 2581 3350 2582 s = mark; 3351 if (!PUGI__OPTSET(parse_trim_pcdata)) 3352 s = mark; 2583 3353 2584 if (cursor->parent )3354 if (cursor->parent || PUGI__OPTSET(parse_fragment)) 2585 3355 { 2586 3356 PUGI__PUSHNODE(node_pcdata); // Append a new node on the tree. … … 2607 3377 2608 3378 // check that last tag is closed 2609 if (cursor != xmldoc) PUGI__THROW_ERROR(status_end_element_mismatch, s);3379 if (cursor != root) PUGI__THROW_ERROR(status_end_element_mismatch, s); 2610 3380 2611 3381 return s; 2612 3382 } 2613 3383 2614 static xml_parse_result parse(char_t* buffer, size_t length, xml_node_struct* root, unsigned int optmsk) 2615 { 2616 xml_document_struct* xmldoc = static_cast<xml_document_struct*>(root); 2617 2618 // store buffer for offset_debug 2619 xmldoc->buffer = buffer; 2620 3384 #ifdef PUGIXML_WCHAR_MODE 3385 static char_t* parse_skip_bom(char_t* s) 3386 { 3387 unsigned int bom = 0xfeff; 3388 return (s[0] == static_cast<wchar_t>(bom)) ? s + 1 : s; 3389 } 3390 #else 3391 static char_t* parse_skip_bom(char_t* s) 3392 { 3393 return (s[0] == '\xef' && s[1] == '\xbb' && s[2] == '\xbf') ? s + 3 : s; 3394 } 3395 #endif 3396 3397 static bool has_element_node_siblings(xml_node_struct* node) 3398 { 3399 while (node) 3400 { 3401 if (PUGI__NODETYPE(node) == node_element) return true; 3402 3403 node = node->next_sibling; 3404 } 3405 3406 return false; 3407 } 3408 3409 static xml_parse_result parse(char_t* buffer, size_t length, xml_document_struct* xmldoc, xml_node_struct* root, unsigned int optmsk) 3410 { 2621 3411 // early-out for empty documents 2622 if (length == 0) return make_parse_result(status_ok); 2623 3412 if (length == 0) 3413 return make_parse_result(PUGI__OPTSET(parse_fragment) ? status_ok : status_no_document_element); 3414 3415 // get last child of the root before parsing 3416 xml_node_struct* last_root_child = root->first_child ? root->first_child->prev_sibling_c + 0 : 0; 3417 2624 3418 // create parser on stack 2625 xml_parser parser( *xmldoc);3419 xml_parser parser(static_cast<xml_allocator*>(xmldoc)); 2626 3420 2627 3421 // save last character and make buffer zero-terminated (speeds up parsing) … … 2629 3423 buffer[length - 1] = 0; 2630 3424 3425 // skip BOM to make sure it does not end up as part of parse output 3426 char_t* buffer_data = parse_skip_bom(buffer); 3427 2631 3428 // perform actual parsing 2632 parser.parse (buffer, xmldoc, optmsk, endch);3429 parser.parse_tree(buffer_data, root, optmsk, endch); 2633 3430 2634 3431 xml_parse_result result = make_parse_result(parser.error_status, parser.error_offset ? parser.error_offset - buffer : 0); 2635 3432 assert(result.offset >= 0 && static_cast<size_t>(result.offset) <= length); 2636 3433 2637 // update allocator state 2638 *static_cast<xml_allocator*>(xmldoc) = parser.alloc; 2639 2640 // since we removed last character, we have to handle the only possible false positive 2641 if (result && endch == '<') 2642 { 2643 // there's no possible well-formed document with < at the end 2644 return make_parse_result(status_unrecognized_tag, length); 3434 if (result) 3435 { 3436 // since we removed last character, we have to handle the only possible false positive (stray <) 3437 if (endch == '<') 3438 return make_parse_result(status_unrecognized_tag, length - 1); 3439 3440 // check if there are any element nodes parsed 3441 xml_node_struct* first_root_child_parsed = last_root_child ? last_root_child->next_sibling + 0 : root->first_child+ 0; 3442 3443 if (!PUGI__OPTSET(parse_fragment) && !has_element_node_siblings(first_root_child_parsed)) 3444 return make_parse_result(status_no_document_element, length - 1); 3445 } 3446 else 3447 { 3448 // roll back offset if it occurs on a null terminator in the source buffer 3449 if (result.offset > 0 && static_cast<size_t>(result.offset) == length - 1 && endch == 0) 3450 result.offset--; 2645 3451 } 2646 3452 … … 2677 3483 } 2678 3484 3485 template <typename D, typename T> PUGI__FN size_t convert_buffer_output_generic(typename T::value_type dest, const char_t* data, size_t length, D, T) 3486 { 3487 PUGI__STATIC_ASSERT(sizeof(char_t) == sizeof(typename D::type)); 3488 3489 typename T::value_type end = D::process(reinterpret_cast<const typename D::type*>(data), length, dest, T()); 3490 3491 return static_cast<size_t>(end - dest) * sizeof(*dest); 3492 } 3493 3494 template <typename D, typename T> PUGI__FN size_t convert_buffer_output_generic(typename T::value_type dest, const char_t* data, size_t length, D, T, bool opt_swap) 3495 { 3496 PUGI__STATIC_ASSERT(sizeof(char_t) == sizeof(typename D::type)); 3497 3498 typename T::value_type end = D::process(reinterpret_cast<const typename D::type*>(data), length, dest, T()); 3499 3500 if (opt_swap) 3501 { 3502 for (typename T::value_type i = dest; i != end; ++i) 3503 *i = endian_swap(*i); 3504 } 3505 3506 return static_cast<size_t>(end - dest) * sizeof(*dest); 3507 } 3508 2679 3509 #ifdef PUGIXML_WCHAR_MODE 2680 3510 PUGI__FN size_t get_valid_length(const char_t* data, size_t length) 2681 3511 { 2682 assert(length > 0);3512 if (length < 1) return 0; 2683 3513 2684 3514 // discard last character if it's the lead of a surrogate pair … … 2686 3516 } 2687 3517 2688 PUGI__FN size_t convert_buffer (char_t* r_char, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding)3518 PUGI__FN size_t convert_buffer_output(char_t* r_char, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding) 2689 3519 { 2690 3520 // only endian-swapping is required … … 2698 3528 // convert to utf8 2699 3529 if (encoding == encoding_utf8) 2700 { 2701 uint8_t* dest = r_u8; 2702 uint8_t* end = utf_decoder<utf8_writer>::decode_wchar_block(data, length, dest); 2703 2704 return static_cast<size_t>(end - dest); 2705 } 3530 return convert_buffer_output_generic(r_u8, data, length, wchar_decoder(), utf8_writer()); 2706 3531 2707 3532 // convert to utf16 2708 3533 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) 2709 3534 { 2710 uint16_t* dest = r_u16;2711 2712 // convert to native utf162713 uint16_t* end = utf_decoder<utf16_writer>::decode_wchar_block(data, length, dest);2714 2715 // swap if necessary2716 3535 xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be; 2717 3536 2718 if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast<size_t>(end - dest)); 2719 2720 return static_cast<size_t>(end - dest) * sizeof(uint16_t); 3537 return convert_buffer_output_generic(r_u16, data, length, wchar_decoder(), utf16_writer(), native_encoding != encoding); 2721 3538 } 2722 3539 … … 2724 3541 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) 2725 3542 { 2726 uint32_t* dest = r_u32;2727 2728 // convert to native utf322729 uint32_t* end = utf_decoder<utf32_writer>::decode_wchar_block(data, length, dest);2730 2731 // swap if necessary2732 3543 xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be; 2733 3544 2734 if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast<size_t>(end - dest)); 2735 2736 return static_cast<size_t>(end - dest) * sizeof(uint32_t); 3545 return convert_buffer_output_generic(r_u32, data, length, wchar_decoder(), utf32_writer(), native_encoding != encoding); 2737 3546 } 2738 3547 2739 3548 // convert to latin1 2740 3549 if (encoding == encoding_latin1) 2741 { 2742 uint8_t* dest = r_u8; 2743 uint8_t* end = utf_decoder<latin1_writer>::decode_wchar_block(data, length, dest); 2744 2745 return static_cast<size_t>(end - dest); 2746 } 3550 return convert_buffer_output_generic(r_u8, data, length, wchar_decoder(), latin1_writer()); 2747 3551 2748 3552 assert(!"Invalid encoding"); … … 2752 3556 PUGI__FN size_t get_valid_length(const char_t* data, size_t length) 2753 3557 { 2754 assert(length > 4);3558 if (length < 5) return 0; 2755 3559 2756 3560 for (size_t i = 1; i <= 4; ++i) … … 2766 3570 } 2767 3571 2768 PUGI__FN size_t convert_buffer (char_t* /* r_char */, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding)3572 PUGI__FN size_t convert_buffer_output(char_t* /* r_char */, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding) 2769 3573 { 2770 3574 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) 2771 3575 { 2772 uint16_t* dest = r_u16;2773 2774 // convert to native utf162775 uint16_t* end = utf_decoder<utf16_writer>::decode_utf8_block(reinterpret_cast<const uint8_t*>(data), length, dest);2776 2777 // swap if necessary2778 3576 xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be; 2779 3577 2780 if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast<size_t>(end - dest)); 2781 2782 return static_cast<size_t>(end - dest) * sizeof(uint16_t); 3578 return convert_buffer_output_generic(r_u16, data, length, utf8_decoder(), utf16_writer(), native_encoding != encoding); 2783 3579 } 2784 3580 2785 3581 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) 2786 3582 { 2787 uint32_t* dest = r_u32;2788 2789 // convert to native utf322790 uint32_t* end = utf_decoder<utf32_writer>::decode_utf8_block(reinterpret_cast<const uint8_t*>(data), length, dest);2791 2792 // swap if necessary2793 3583 xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be; 2794 3584 2795 if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast<size_t>(end - dest)); 2796 2797 return static_cast<size_t>(end - dest) * sizeof(uint32_t); 3585 return convert_buffer_output_generic(r_u32, data, length, utf8_decoder(), utf32_writer(), native_encoding != encoding); 2798 3586 } 2799 3587 2800 3588 if (encoding == encoding_latin1) 2801 { 2802 uint8_t* dest = r_u8; 2803 uint8_t* end = utf_decoder<latin1_writer>::decode_utf8_block(reinterpret_cast<const uint8_t*>(data), length, dest); 2804 2805 return static_cast<size_t>(end - dest); 2806 } 3589 return convert_buffer_output_generic(r_u8, data, length, utf8_decoder(), latin1_writer()); 2807 3590 2808 3591 assert(!"Invalid encoding"); … … 2822 3605 } 2823 3606 2824 ~xml_buffered_writer() 2825 { 2826 flush(); 2827 } 2828 2829 void flush() 3607 size_t flush() 2830 3608 { 2831 3609 flush(buffer, bufsize); 2832 3610 bufsize = 0; 3611 return 0; 2833 3612 } 2834 3613 … … 2843 3622 { 2844 3623 // convert chunk 2845 size_t result = convert_buffer (scratch.data_char, scratch.data_u8, scratch.data_u16, scratch.data_u32, data, size, encoding);3624 size_t result = convert_buffer_output(scratch.data_char, scratch.data_u8, scratch.data_u16, scratch.data_u32, data, size, encoding); 2846 3625 assert(result <= sizeof(scratch)); 2847 3626 … … 2851 3630 } 2852 3631 2853 void write(const char_t* data, size_t length) 2854 { 2855 if (bufsize + length > bufcapacity) 2856 { 2857 // flush the remaining buffer contents 2858 flush(); 2859 2860 // handle large chunks 2861 if (length > bufcapacity) 2862 { 2863 if (encoding == get_write_native_encoding()) 2864 { 2865 // fast path, can just write data chunk 2866 writer.write(data, length * sizeof(char_t)); 2867 return; 2868 } 2869 2870 // need to convert in suitable chunks 2871 while (length > bufcapacity) 2872 { 2873 // get chunk size by selecting such number of characters that are guaranteed to fit into scratch buffer 2874 // and form a complete codepoint sequence (i.e. discard start of last codepoint if necessary) 2875 size_t chunk_size = get_valid_length(data, bufcapacity); 2876 2877 // convert chunk and write 2878 flush(data, chunk_size); 2879 2880 // iterate 2881 data += chunk_size; 2882 length -= chunk_size; 2883 } 2884 2885 // small tail is copied below 2886 bufsize = 0; 2887 } 3632 void write_direct(const char_t* data, size_t length) 3633 { 3634 // flush the remaining buffer contents 3635 flush(); 3636 3637 // handle large chunks 3638 if (length > bufcapacity) 3639 { 3640 if (encoding == get_write_native_encoding()) 3641 { 3642 // fast path, can just write data chunk 3643 writer.write(data, length * sizeof(char_t)); 3644 return; 3645 } 3646 3647 // need to convert in suitable chunks 3648 while (length > bufcapacity) 3649 { 3650 // get chunk size by selecting such number of characters that are guaranteed to fit into scratch buffer 3651 // and form a complete codepoint sequence (i.e. discard start of last codepoint if necessary) 3652 size_t chunk_size = get_valid_length(data, bufcapacity); 3653 assert(chunk_size); 3654 3655 // convert chunk and write 3656 flush(data, chunk_size); 3657 3658 // iterate 3659 data += chunk_size; 3660 length -= chunk_size; 3661 } 3662 3663 // small tail is copied below 3664 bufsize = 0; 2888 3665 } 2889 3666 … … 2892 3669 } 2893 3670 2894 void write(const char_t* data) 2895 { 2896 write(data, strlength(data)); 3671 void write_buffer(const char_t* data, size_t length) 3672 { 3673 size_t offset = bufsize; 3674 3675 if (offset + length <= bufcapacity) 3676 { 3677 memcpy(buffer + offset, data, length * sizeof(char_t)); 3678 bufsize = offset + length; 3679 } 3680 else 3681 { 3682 write_direct(data, length); 3683 } 3684 } 3685 3686 void write_string(const char_t* data) 3687 { 3688 // write the part of the string that fits in the buffer 3689 size_t offset = bufsize; 3690 3691 while (*data && offset < bufcapacity) 3692 buffer[offset++] = *data++; 3693 3694 // write the rest 3695 if (offset < bufcapacity) 3696 { 3697 bufsize = offset; 3698 } 3699 else 3700 { 3701 // backtrack a bit if we have split the codepoint 3702 size_t length = offset - bufsize; 3703 size_t extra = length - get_valid_length(data - length, length); 3704 3705 bufsize = offset - extra; 3706 3707 write_direct(data - extra, strlength(data) + extra); 3708 } 2897 3709 } 2898 3710 2899 3711 void write(char_t d0) 2900 3712 { 2901 if (bufsize + 1 > bufcapacity) flush(); 2902 2903 buffer[bufsize + 0] = d0; 2904 bufsize += 1; 3713 size_t offset = bufsize; 3714 if (offset > bufcapacity - 1) offset = flush(); 3715 3716 buffer[offset + 0] = d0; 3717 bufsize = offset + 1; 2905 3718 } 2906 3719 2907 3720 void write(char_t d0, char_t d1) 2908 3721 { 2909 if (bufsize + 2 > bufcapacity) flush(); 2910 2911 buffer[bufsize + 0] = d0; 2912 buffer[bufsize + 1] = d1; 2913 bufsize += 2; 3722 size_t offset = bufsize; 3723 if (offset > bufcapacity - 2) offset = flush(); 3724 3725 buffer[offset + 0] = d0; 3726 buffer[offset + 1] = d1; 3727 bufsize = offset + 2; 2914 3728 } 2915 3729 2916 3730 void write(char_t d0, char_t d1, char_t d2) 2917 3731 { 2918 if (bufsize + 3 > bufcapacity) flush(); 2919 2920 buffer[bufsize + 0] = d0; 2921 buffer[bufsize + 1] = d1; 2922 buffer[bufsize + 2] = d2; 2923 bufsize += 3; 3732 size_t offset = bufsize; 3733 if (offset > bufcapacity - 3) offset = flush(); 3734 3735 buffer[offset + 0] = d0; 3736 buffer[offset + 1] = d1; 3737 buffer[offset + 2] = d2; 3738 bufsize = offset + 3; 2924 3739 } 2925 3740 2926 3741 void write(char_t d0, char_t d1, char_t d2, char_t d3) 2927 3742 { 2928 if (bufsize + 4 > bufcapacity) flush(); 2929 2930 buffer[bufsize + 0] = d0; 2931 buffer[bufsize + 1] = d1; 2932 buffer[bufsize + 2] = d2; 2933 buffer[bufsize + 3] = d3; 2934 bufsize += 4; 3743 size_t offset = bufsize; 3744 if (offset > bufcapacity - 4) offset = flush(); 3745 3746 buffer[offset + 0] = d0; 3747 buffer[offset + 1] = d1; 3748 buffer[offset + 2] = d2; 3749 buffer[offset + 3] = d3; 3750 bufsize = offset + 4; 2935 3751 } 2936 3752 2937 3753 void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4) 2938 3754 { 2939 if (bufsize + 5 > bufcapacity) flush(); 2940 2941 buffer[bufsize + 0] = d0; 2942 buffer[bufsize + 1] = d1; 2943 buffer[bufsize + 2] = d2; 2944 buffer[bufsize + 3] = d3; 2945 buffer[bufsize + 4] = d4; 2946 bufsize += 5; 3755 size_t offset = bufsize; 3756 if (offset > bufcapacity - 5) offset = flush(); 3757 3758 buffer[offset + 0] = d0; 3759 buffer[offset + 1] = d1; 3760 buffer[offset + 2] = d2; 3761 buffer[offset + 3] = d3; 3762 buffer[offset + 4] = d4; 3763 bufsize = offset + 5; 2947 3764 } 2948 3765 2949 3766 void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4, char_t d5) 2950 3767 { 2951 if (bufsize + 6 > bufcapacity) flush(); 2952 2953 buffer[bufsize + 0] = d0; 2954 buffer[bufsize + 1] = d1; 2955 buffer[bufsize + 2] = d2; 2956 buffer[bufsize + 3] = d3; 2957 buffer[bufsize + 4] = d4; 2958 buffer[bufsize + 5] = d5; 2959 bufsize += 6; 3768 size_t offset = bufsize; 3769 if (offset > bufcapacity - 6) offset = flush(); 3770 3771 buffer[offset + 0] = d0; 3772 buffer[offset + 1] = d1; 3773 buffer[offset + 2] = d2; 3774 buffer[offset + 3] = d3; 3775 buffer[offset + 4] = d4; 3776 buffer[offset + 5] = d5; 3777 bufsize = offset + 6; 2960 3778 } 2961 3779 … … 2997 3815 2998 3816 // While *s is a usual symbol 2999 while (!PUGI__IS_CHARTYPEX(*s, type)) ++s;3817 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPEX(ss, type)); 3000 3818 3001 writer.write (prev, static_cast<size_t>(s - prev));3819 writer.write_buffer(prev, static_cast<size_t>(s - prev)); 3002 3820 3003 3821 switch (*s) … … 3034 3852 { 3035 3853 if (flags & format_no_escapes) 3036 writer.write (s);3854 writer.write_string(s); 3037 3855 else 3038 3856 text_output_escaped(writer, s, type); … … 3054 3872 if (*s) s += 2; 3055 3873 3056 writer.write (prev, static_cast<size_t>(s - prev));3874 writer.write_buffer(prev, static_cast<size_t>(s - prev)); 3057 3875 3058 3876 writer.write(']', ']', '>'); … … 3061 3879 } 3062 3880 3063 PUGI__FN void node_output_attributes(xml_buffered_writer& writer, const xml_node& node, unsigned int flags) 3881 PUGI__FN void text_output_indent(xml_buffered_writer& writer, const char_t* indent, size_t indent_length, unsigned int depth) 3882 { 3883 switch (indent_length) 3884 { 3885 case 1: 3886 { 3887 for (unsigned int i = 0; i < depth; ++i) 3888 writer.write(indent[0]); 3889 break; 3890 } 3891 3892 case 2: 3893 { 3894 for (unsigned int i = 0; i < depth; ++i) 3895 writer.write(indent[0], indent[1]); 3896 break; 3897 } 3898 3899 case 3: 3900 { 3901 for (unsigned int i = 0; i < depth; ++i) 3902 writer.write(indent[0], indent[1], indent[2]); 3903 break; 3904 } 3905 3906 case 4: 3907 { 3908 for (unsigned int i = 0; i < depth; ++i) 3909 writer.write(indent[0], indent[1], indent[2], indent[3]); 3910 break; 3911 } 3912 3913 default: 3914 { 3915 for (unsigned int i = 0; i < depth; ++i) 3916 writer.write_buffer(indent, indent_length); 3917 } 3918 } 3919 } 3920 3921 PUGI__FN void node_output_comment(xml_buffered_writer& writer, const char_t* s) 3922 { 3923 writer.write('<', '!', '-', '-'); 3924 3925 while (*s) 3926 { 3927 const char_t* prev = s; 3928 3929 // look for -\0 or -- sequence - we can't output it since -- is illegal in comment body 3930 while (*s && !(s[0] == '-' && (s[1] == '-' || s[1] == 0))) ++s; 3931 3932 writer.write_buffer(prev, static_cast<size_t>(s - prev)); 3933 3934 if (*s) 3935 { 3936 assert(*s == '-'); 3937 3938 writer.write('-', ' '); 3939 ++s; 3940 } 3941 } 3942 3943 writer.write('-', '-', '>'); 3944 } 3945 3946 PUGI__FN void node_output_pi_value(xml_buffered_writer& writer, const char_t* s) 3947 { 3948 while (*s) 3949 { 3950 const char_t* prev = s; 3951 3952 // look for ?> sequence - we can't output it since ?> terminates PI 3953 while (*s && !(s[0] == '?' && s[1] == '>')) ++s; 3954 3955 writer.write_buffer(prev, static_cast<size_t>(s - prev)); 3956 3957 if (*s) 3958 { 3959 assert(s[0] == '?' && s[1] == '>'); 3960 3961 writer.write('?', ' ', '>'); 3962 s += 2; 3963 } 3964 } 3965 } 3966 3967 PUGI__FN void node_output_attributes(xml_buffered_writer& writer, xml_node_struct* node, const char_t* indent, size_t indent_length, unsigned int flags, unsigned int depth) 3064 3968 { 3065 3969 const char_t* default_name = PUGIXML_TEXT(":anonymous"); 3066 3970 3067 for (xml_attribute a = node.first_attribute(); a; a = a.next_attribute()) 3068 { 3069 writer.write(' '); 3070 writer.write(a.name()[0] ? a.name() : default_name); 3971 for (xml_attribute_struct* a = node->first_attribute; a; a = a->next_attribute) 3972 { 3973 if ((flags & (format_indent_attributes | format_raw)) == format_indent_attributes) 3974 { 3975 writer.write('\n'); 3976 3977 text_output_indent(writer, indent, indent_length, depth + 1); 3978 } 3979 else 3980 { 3981 writer.write(' '); 3982 } 3983 3984 writer.write_string(a->name ? a->name + 0 : default_name); 3071 3985 writer.write('=', '"'); 3072 3986 3073 text_output(writer, a.value(), ctx_special_attr, flags); 3987 if (a->value) 3988 text_output(writer, a->value, ctx_special_attr, flags); 3074 3989 3075 3990 writer.write('"'); … … 3077 3992 } 3078 3993 3079 PUGI__FN void node_output(xml_buffered_writer& writer, const xml_node& node, const char_t* indent, unsigned int flags, unsigned int depth)3994 PUGI__FN bool node_output_start(xml_buffered_writer& writer, xml_node_struct* node, const char_t* indent, size_t indent_length, unsigned int flags, unsigned int depth) 3080 3995 { 3081 3996 const char_t* default_name = PUGIXML_TEXT(":anonymous"); 3082 3083 if ((flags & format_indent) != 0 && (flags & format_raw) == 0) 3084 for (unsigned int i = 0; i < depth; ++i) writer.write(indent); 3085 3086 switch (node.type()) 3087 { 3088 case node_document: 3089 { 3090 for (xml_node n = node.first_child(); n; n = n.next_sibling()) 3091 node_output(writer, n, indent, flags, depth); 3092 break; 3093 } 3094 3095 case node_element: 3096 { 3097 const char_t* name = node.name()[0] ? node.name() : default_name; 3098 3099 writer.write('<'); 3100 writer.write(name); 3101 3102 node_output_attributes(writer, node, flags); 3103 3104 if (flags & format_raw) 3105 { 3106 if (!node.first_child()) 3107 writer.write(' ', '/', '>'); 3997 const char_t* name = node->name ? node->name + 0 : default_name; 3998 3999 writer.write('<'); 4000 writer.write_string(name); 4001 4002 if (node->first_attribute) 4003 node_output_attributes(writer, node, indent, indent_length, flags, depth); 4004 4005 if (!node->first_child) 4006 { 4007 writer.write(' ', '/', '>'); 4008 4009 return false; 4010 } 4011 else 4012 { 4013 writer.write('>'); 4014 4015 return true; 4016 } 4017 } 4018 4019 PUGI__FN void node_output_end(xml_buffered_writer& writer, xml_node_struct* node) 4020 { 4021 const char_t* default_name = PUGIXML_TEXT(":anonymous"); 4022 const char_t* name = node->name ? node->name + 0 : default_name; 4023 4024 writer.write('<', '/'); 4025 writer.write_string(name); 4026 writer.write('>'); 4027 } 4028 4029 PUGI__FN void node_output_simple(xml_buffered_writer& writer, xml_node_struct* node, unsigned int flags) 4030 { 4031 const char_t* default_name = PUGIXML_TEXT(":anonymous"); 4032 4033 switch (PUGI__NODETYPE(node)) 4034 { 4035 case node_pcdata: 4036 text_output(writer, node->value ? node->value + 0 : PUGIXML_TEXT(""), ctx_special_pcdata, flags); 4037 break; 4038 4039 case node_cdata: 4040 text_output_cdata(writer, node->value ? node->value + 0 : PUGIXML_TEXT("")); 4041 break; 4042 4043 case node_comment: 4044 node_output_comment(writer, node->value ? node->value + 0 : PUGIXML_TEXT("")); 4045 break; 4046 4047 case node_pi: 4048 writer.write('<', '?'); 4049 writer.write_string(node->name ? node->name + 0 : default_name); 4050 4051 if (node->value) 4052 { 4053 writer.write(' '); 4054 node_output_pi_value(writer, node->value); 4055 } 4056 4057 writer.write('?', '>'); 4058 break; 4059 4060 case node_declaration: 4061 writer.write('<', '?'); 4062 writer.write_string(node->name ? node->name + 0 : default_name); 4063 node_output_attributes(writer, node, PUGIXML_TEXT(""), 0, flags | format_raw, 0); 4064 writer.write('?', '>'); 4065 break; 4066 4067 case node_doctype: 4068 writer.write('<', '!', 'D', 'O', 'C'); 4069 writer.write('T', 'Y', 'P', 'E'); 4070 4071 if (node->value) 4072 { 4073 writer.write(' '); 4074 writer.write_string(node->value); 4075 } 4076 4077 writer.write('>'); 4078 break; 4079 4080 default: 4081 assert(!"Invalid node type"); 4082 } 4083 } 4084 4085 enum indent_flags_t 4086 { 4087 indent_newline = 1, 4088 indent_indent = 2 4089 }; 4090 4091 PUGI__FN void node_output(xml_buffered_writer& writer, xml_node_struct* root, const char_t* indent, unsigned int flags, unsigned int depth) 4092 { 4093 size_t indent_length = ((flags & (format_indent | format_indent_attributes)) && (flags & format_raw) == 0) ? strlength(indent) : 0; 4094 unsigned int indent_flags = indent_indent; 4095 4096 xml_node_struct* node = root; 4097 4098 do 4099 { 4100 assert(node); 4101 4102 // begin writing current node 4103 if (PUGI__NODETYPE(node) == node_pcdata || PUGI__NODETYPE(node) == node_cdata) 4104 { 4105 node_output_simple(writer, node, flags); 4106 4107 indent_flags = 0; 4108 } 4109 else 4110 { 4111 if ((indent_flags & indent_newline) && (flags & format_raw) == 0) 4112 writer.write('\n'); 4113 4114 if ((indent_flags & indent_indent) && indent_length) 4115 text_output_indent(writer, indent, indent_length, depth); 4116 4117 if (PUGI__NODETYPE(node) == node_element) 4118 { 4119 indent_flags = indent_newline | indent_indent; 4120 4121 if (node_output_start(writer, node, indent, indent_length, flags, depth)) 4122 { 4123 node = node->first_child; 4124 depth++; 4125 continue; 4126 } 4127 } 4128 else if (PUGI__NODETYPE(node) == node_document) 4129 { 4130 indent_flags = indent_indent; 4131 4132 if (node->first_child) 4133 { 4134 node = node->first_child; 4135 continue; 4136 } 4137 } 3108 4138 else 3109 4139 { 3110 writer.write('>'); 3111 3112 for (xml_node n = node.first_child(); n; n = n.next_sibling()) 3113 node_output(writer, n, indent, flags, depth + 1); 3114 3115 writer.write('<', '/'); 3116 writer.write(name); 3117 writer.write('>'); 3118 } 3119 } 3120 else if (!node.first_child()) 3121 writer.write(' ', '/', '>', '\n'); 3122 else if (node.first_child() == node.last_child() && (node.first_child().type() == node_pcdata || node.first_child().type() == node_cdata)) 3123 { 3124 writer.write('>'); 3125 3126 if (node.first_child().type() == node_pcdata) 3127 text_output(writer, node.first_child().value(), ctx_special_pcdata, flags); 3128 else 3129 text_output_cdata(writer, node.first_child().value()); 3130 3131 writer.write('<', '/'); 3132 writer.write(name); 3133 writer.write('>', '\n'); 3134 } 3135 else 3136 { 3137 writer.write('>', '\n'); 3138 3139 for (xml_node n = node.first_child(); n; n = n.next_sibling()) 3140 node_output(writer, n, indent, flags, depth + 1); 3141 3142 if ((flags & format_indent) != 0 && (flags & format_raw) == 0) 3143 for (unsigned int i = 0; i < depth; ++i) writer.write(indent); 3144 3145 writer.write('<', '/'); 3146 writer.write(name); 3147 writer.write('>', '\n'); 3148 } 3149 3150 break; 3151 } 3152 3153 case node_pcdata: 3154 text_output(writer, node.value(), ctx_special_pcdata, flags); 3155 if ((flags & format_raw) == 0) writer.write('\n'); 3156 break; 3157 3158 case node_cdata: 3159 text_output_cdata(writer, node.value()); 3160 if ((flags & format_raw) == 0) writer.write('\n'); 3161 break; 3162 3163 case node_comment: 3164 writer.write('<', '!', '-', '-'); 3165 writer.write(node.value()); 3166 writer.write('-', '-', '>'); 3167 if ((flags & format_raw) == 0) writer.write('\n'); 3168 break; 3169 3170 case node_pi: 3171 case node_declaration: 3172 writer.write('<', '?'); 3173 writer.write(node.name()[0] ? node.name() : default_name); 3174 3175 if (node.type() == node_declaration) 3176 { 3177 node_output_attributes(writer, node, flags); 3178 } 3179 else if (node.value()[0]) 3180 { 3181 writer.write(' '); 3182 writer.write(node.value()); 3183 } 3184 3185 writer.write('?', '>'); 3186 if ((flags & format_raw) == 0) writer.write('\n'); 3187 break; 3188 3189 case node_doctype: 3190 writer.write('<', '!', 'D', 'O', 'C'); 3191 writer.write('T', 'Y', 'P', 'E'); 3192 3193 if (node.value()[0]) 3194 { 3195 writer.write(' '); 3196 writer.write(node.value()); 3197 } 3198 3199 writer.write('>'); 3200 if ((flags & format_raw) == 0) writer.write('\n'); 3201 break; 3202 3203 default: 3204 assert(!"Invalid node type"); 3205 } 3206 } 3207 3208 inline bool has_declaration(const xml_node& node) 3209 { 3210 for (xml_node child = node.first_child(); child; child = child.next_sibling()) 3211 { 3212 xml_node_type type = child.type(); 4140 node_output_simple(writer, node, flags); 4141 4142 indent_flags = indent_newline | indent_indent; 4143 } 4144 } 4145 4146 // continue to the next node 4147 while (node != root) 4148 { 4149 if (node->next_sibling) 4150 { 4151 node = node->next_sibling; 4152 break; 4153 } 4154 4155 node = node->parent; 4156 4157 // write closing node 4158 if (PUGI__NODETYPE(node) == node_element) 4159 { 4160 depth--; 4161 4162 if ((indent_flags & indent_newline) && (flags & format_raw) == 0) 4163 writer.write('\n'); 4164 4165 if ((indent_flags & indent_indent) && indent_length) 4166 text_output_indent(writer, indent, indent_length, depth); 4167 4168 node_output_end(writer, node); 4169 4170 indent_flags = indent_newline | indent_indent; 4171 } 4172 } 4173 } 4174 while (node != root); 4175 4176 if ((indent_flags & indent_newline) && (flags & format_raw) == 0) 4177 writer.write('\n'); 4178 } 4179 4180 PUGI__FN bool has_declaration(xml_node_struct* node) 4181 { 4182 for (xml_node_struct* child = node->first_child; child; child = child->next_sibling) 4183 { 4184 xml_node_type type = PUGI__NODETYPE(child); 3213 4185 3214 4186 if (type == node_declaration) return true; … … 3219 4191 } 3220 4192 3221 inline bool allow_insert_child(xml_node_type parent, xml_node_type child) 4193 PUGI__FN bool is_attribute_of(xml_attribute_struct* attr, xml_node_struct* node) 4194 { 4195 for (xml_attribute_struct* a = node->first_attribute; a; a = a->next_attribute) 4196 if (a == attr) 4197 return true; 4198 4199 return false; 4200 } 4201 4202 PUGI__FN bool allow_insert_attribute(xml_node_type parent) 4203 { 4204 return parent == node_element || parent == node_declaration; 4205 } 4206 4207 PUGI__FN bool allow_insert_child(xml_node_type parent, xml_node_type child) 3222 4208 { 3223 4209 if (parent != node_document && parent != node_element) return false; … … 3228 4214 } 3229 4215 3230 PUGI__FN void recursive_copy_skip(xml_node& dest, const xml_node& source, const xml_node& skip) 3231 { 3232 assert(dest.type() == source.type()); 3233 3234 switch (source.type()) 3235 { 3236 case node_element: 3237 { 3238 dest.set_name(source.name()); 3239 3240 for (xml_attribute a = source.first_attribute(); a; a = a.next_attribute()) 3241 dest.append_attribute(a.name()).set_value(a.value()); 3242 3243 for (xml_node c = source.first_child(); c; c = c.next_sibling()) 3244 { 3245 if (c == skip) continue; 3246 3247 xml_node cc = dest.append_child(c.type()); 3248 assert(cc); 3249 3250 recursive_copy_skip(cc, c, skip); 3251 } 3252 3253 break; 3254 } 3255 3256 case node_pcdata: 3257 case node_cdata: 3258 case node_comment: 3259 case node_doctype: 3260 dest.set_value(source.value()); 3261 break; 3262 3263 case node_pi: 3264 dest.set_name(source.name()); 3265 dest.set_value(source.value()); 3266 break; 3267 3268 case node_declaration: 3269 { 3270 dest.set_name(source.name()); 3271 3272 for (xml_attribute a = source.first_attribute(); a; a = a.next_attribute()) 3273 dest.append_attribute(a.name()).set_value(a.value()); 3274 3275 break; 3276 } 3277 3278 default: 3279 assert(!"Invalid node type"); 3280 } 4216 PUGI__FN bool allow_move(xml_node parent, xml_node child) 4217 { 4218 // check that child can be a child of parent 4219 if (!allow_insert_child(parent.type(), child.type())) 4220 return false; 4221 4222 // check that node is not moved between documents 4223 if (parent.root() != child.root()) 4224 return false; 4225 4226 // check that new parent is not in the child subtree 4227 xml_node cur = parent; 4228 4229 while (cur) 4230 { 4231 if (cur == child) 4232 return false; 4233 4234 cur = cur.parent(); 4235 } 4236 4237 return true; 4238 } 4239 4240 template <typename String, typename Header> 4241 PUGI__FN void node_copy_string(String& dest, Header& header, uintptr_t header_mask, char_t* source, Header& source_header, xml_allocator* alloc) 4242 { 4243 assert(!dest && (header & header_mask) == 0); 4244 4245 if (source) 4246 { 4247 if (alloc && (source_header & header_mask) == 0) 4248 { 4249 dest = source; 4250 4251 // since strcpy_insitu can reuse document buffer memory we need to mark both source and dest as shared 4252 header |= xml_memory_page_contents_shared_mask; 4253 source_header |= xml_memory_page_contents_shared_mask; 4254 } 4255 else 4256 strcpy_insitu(dest, header, header_mask, source, strlength(source)); 4257 } 4258 } 4259 4260 PUGI__FN void node_copy_contents(xml_node_struct* dn, xml_node_struct* sn, xml_allocator* shared_alloc) 4261 { 4262 node_copy_string(dn->name, dn->header, xml_memory_page_name_allocated_mask, sn->name, sn->header, shared_alloc); 4263 node_copy_string(dn->value, dn->header, xml_memory_page_value_allocated_mask, sn->value, sn->header, shared_alloc); 4264 4265 for (xml_attribute_struct* sa = sn->first_attribute; sa; sa = sa->next_attribute) 4266 { 4267 xml_attribute_struct* da = append_new_attribute(dn, get_allocator(dn)); 4268 4269 if (da) 4270 { 4271 node_copy_string(da->name, da->header, xml_memory_page_name_allocated_mask, sa->name, sa->header, shared_alloc); 4272 node_copy_string(da->value, da->header, xml_memory_page_value_allocated_mask, sa->value, sa->header, shared_alloc); 4273 } 4274 } 4275 } 4276 4277 PUGI__FN void node_copy_tree(xml_node_struct* dn, xml_node_struct* sn) 4278 { 4279 xml_allocator& alloc = get_allocator(dn); 4280 xml_allocator* shared_alloc = (&alloc == &get_allocator(sn)) ? &alloc : 0; 4281 4282 node_copy_contents(dn, sn, shared_alloc); 4283 4284 xml_node_struct* dit = dn; 4285 xml_node_struct* sit = sn->first_child; 4286 4287 while (sit && sit != sn) 4288 { 4289 if (sit != dn) 4290 { 4291 xml_node_struct* copy = append_new_node(dit, alloc, PUGI__NODETYPE(sit)); 4292 4293 if (copy) 4294 { 4295 node_copy_contents(copy, sit, shared_alloc); 4296 4297 if (sit->first_child) 4298 { 4299 dit = copy; 4300 sit = sit->first_child; 4301 continue; 4302 } 4303 } 4304 } 4305 4306 // continue to the next node 4307 do 4308 { 4309 if (sit->next_sibling) 4310 { 4311 sit = sit->next_sibling; 4312 break; 4313 } 4314 4315 sit = sit->parent; 4316 dit = dit->parent; 4317 } 4318 while (sit != sn); 4319 } 4320 } 4321 4322 PUGI__FN void node_copy_attribute(xml_attribute_struct* da, xml_attribute_struct* sa) 4323 { 4324 xml_allocator& alloc = get_allocator(da); 4325 xml_allocator* shared_alloc = (&alloc == &get_allocator(sa)) ? &alloc : 0; 4326 4327 node_copy_string(da->name, da->header, xml_memory_page_name_allocated_mask, sa->name, sa->header, shared_alloc); 4328 node_copy_string(da->value, da->header, xml_memory_page_value_allocated_mask, sa->value, sa->header, shared_alloc); 3281 4329 } 3282 4330 3283 4331 inline bool is_text_node(xml_node_struct* node) 3284 4332 { 3285 xml_node_type type = static_cast<xml_node_type>((node->header & impl::xml_memory_page_type_mask) + 1);4333 xml_node_type type = PUGI__NODETYPE(node); 3286 4334 3287 4335 return type == node_pcdata || type == node_cdata; … … 3289 4337 3290 4338 // get value with conversion functions 3291 PUGI__FN int get_value_int(const char_t* value, int def) 3292 { 3293 if (!value) return def; 3294 3295 #ifdef PUGIXML_WCHAR_MODE 3296 return static_cast<int>(wcstol(value, 0, 10)); 3297 #else 3298 return static_cast<int>(strtol(value, 0, 10)); 3299 #endif 3300 } 3301 3302 PUGI__FN unsigned int get_value_uint(const char_t* value, unsigned int def) 3303 { 3304 if (!value) return def; 3305 3306 #ifdef PUGIXML_WCHAR_MODE 3307 return static_cast<unsigned int>(wcstoul(value, 0, 10)); 3308 #else 3309 return static_cast<unsigned int>(strtoul(value, 0, 10)); 3310 #endif 3311 } 3312 3313 PUGI__FN double get_value_double(const char_t* value, double def) 3314 { 3315 if (!value) return def; 3316 4339 template <typename U> U string_to_integer(const char_t* value, U minneg, U maxpos) 4340 { 4341 U result = 0; 4342 const char_t* s = value; 4343 4344 while (PUGI__IS_CHARTYPE(*s, ct_space)) 4345 s++; 4346 4347 bool negative = (*s == '-'); 4348 4349 s += (*s == '+' || *s == '-'); 4350 4351 bool overflow = false; 4352 4353 if (s[0] == '0' && (s[1] | ' ') == 'x') 4354 { 4355 s += 2; 4356 4357 // since overflow detection relies on length of the sequence skip leading zeros 4358 while (*s == '0') 4359 s++; 4360 4361 const char_t* start = s; 4362 4363 for (;;) 4364 { 4365 if (static_cast<unsigned>(*s - '0') < 10) 4366 result = result * 16 + (*s - '0'); 4367 else if (static_cast<unsigned>((*s | ' ') - 'a') < 6) 4368 result = result * 16 + ((*s | ' ') - 'a' + 10); 4369 else 4370 break; 4371 4372 s++; 4373 } 4374 4375 size_t digits = static_cast<size_t>(s - start); 4376 4377 overflow = digits > sizeof(U) * 2; 4378 } 4379 else 4380 { 4381 // since overflow detection relies on length of the sequence skip leading zeros 4382 while (*s == '0') 4383 s++; 4384 4385 const char_t* start = s; 4386 4387 for (;;) 4388 { 4389 if (static_cast<unsigned>(*s - '0') < 10) 4390 result = result * 10 + (*s - '0'); 4391 else 4392 break; 4393 4394 s++; 4395 } 4396 4397 size_t digits = static_cast<size_t>(s - start); 4398 4399 PUGI__STATIC_ASSERT(sizeof(U) == 8 || sizeof(U) == 4 || sizeof(U) == 2); 4400 4401 const size_t max_digits10 = sizeof(U) == 8 ? 20 : sizeof(U) == 4 ? 10 : 5; 4402 const char_t max_lead = sizeof(U) == 8 ? '1' : sizeof(U) == 4 ? '4' : '6'; 4403 const size_t high_bit = sizeof(U) * 8 - 1; 4404 4405 overflow = digits >= max_digits10 && !(digits == max_digits10 && (*start < max_lead || (*start == max_lead && result >> high_bit))); 4406 } 4407 4408 if (negative) 4409 return (overflow || result > minneg) ? 0 - minneg : 0 - result; 4410 else 4411 return (overflow || result > maxpos) ? maxpos : result; 4412 } 4413 4414 PUGI__FN int get_value_int(const char_t* value) 4415 { 4416 return string_to_integer<unsigned int>(value, 0 - static_cast<unsigned int>(INT_MIN), INT_MAX); 4417 } 4418 4419 PUGI__FN unsigned int get_value_uint(const char_t* value) 4420 { 4421 return string_to_integer<unsigned int>(value, 0, UINT_MAX); 4422 } 4423 4424 PUGI__FN double get_value_double(const char_t* value) 4425 { 3317 4426 #ifdef PUGIXML_WCHAR_MODE 3318 4427 return wcstod(value, 0); … … 3322 4431 } 3323 4432 3324 PUGI__FN float get_value_float(const char_t* value, float def) 3325 { 3326 if (!value) return def; 3327 4433 PUGI__FN float get_value_float(const char_t* value) 4434 { 3328 4435 #ifdef PUGIXML_WCHAR_MODE 3329 4436 return static_cast<float>(wcstod(value, 0)); … … 3333 4440 } 3334 4441 3335 PUGI__FN bool get_value_bool(const char_t* value, bool def) 3336 { 3337 if (!value) return def; 3338 4442 PUGI__FN bool get_value_bool(const char_t* value) 4443 { 3339 4444 // only look at first char 3340 4445 char_t first = *value; … … 3344 4449 } 3345 4450 4451 #ifdef PUGIXML_HAS_LONG_LONG 4452 PUGI__FN long long get_value_llong(const char_t* value) 4453 { 4454 return string_to_integer<unsigned long long>(value, 0 - static_cast<unsigned long long>(LLONG_MIN), LLONG_MAX); 4455 } 4456 4457 PUGI__FN unsigned long long get_value_ullong(const char_t* value) 4458 { 4459 return string_to_integer<unsigned long long>(value, 0, ULLONG_MAX); 4460 } 4461 #endif 4462 4463 template <typename U> 4464 PUGI__FN char_t* integer_to_string(char_t* begin, char_t* end, U value, bool negative) 4465 { 4466 char_t* result = end - 1; 4467 U rest = negative ? 0 - value : value; 4468 4469 do 4470 { 4471 *result-- = static_cast<char_t>('0' + (rest % 10)); 4472 rest /= 10; 4473 } 4474 while (rest); 4475 4476 assert(result >= begin); 4477 (void)begin; 4478 4479 *result = '-'; 4480 4481 return result + !negative; 4482 } 4483 3346 4484 // set value with conversion functions 3347 PUGI__FN bool set_value_buffer(char_t*& dest, uintptr_t& header, uintptr_t header_mask, char (&buf)[128]) 4485 template <typename String, typename Header> 4486 PUGI__FN bool set_value_ascii(String& dest, Header& header, uintptr_t header_mask, char* buf) 3348 4487 { 3349 4488 #ifdef PUGIXML_WCHAR_MODE 3350 4489 char_t wbuf[128]; 3351 impl::widen_ascii(wbuf, buf); 3352 3353 return strcpy_insitu(dest, header, header_mask, wbuf); 4490 assert(strlen(buf) < sizeof(wbuf) / sizeof(wbuf[0])); 4491 4492 size_t offset = 0; 4493 for (; buf[offset]; ++offset) wbuf[offset] = buf[offset]; 4494 4495 return strcpy_insitu(dest, header, header_mask, wbuf, offset); 3354 4496 #else 3355 return strcpy_insitu(dest, header, header_mask, buf );4497 return strcpy_insitu(dest, header, header_mask, buf, strlen(buf)); 3356 4498 #endif 3357 4499 } 3358 4500 3359 PUGI__FN bool set_value_convert(char_t*& dest, uintptr_t& header, uintptr_t header_mask, int value) 4501 template <typename String, typename Header> 4502 PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, int value) 4503 { 4504 char_t buf[64]; 4505 char_t* end = buf + sizeof(buf) / sizeof(buf[0]); 4506 char_t* begin = integer_to_string<unsigned int>(buf, end, value, value < 0); 4507 4508 return strcpy_insitu(dest, header, header_mask, begin, end - begin); 4509 } 4510 4511 template <typename String, typename Header> 4512 PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, unsigned int value) 4513 { 4514 char_t buf[64]; 4515 char_t* end = buf + sizeof(buf) / sizeof(buf[0]); 4516 char_t* begin = integer_to_string<unsigned int>(buf, end, value, false); 4517 4518 return strcpy_insitu(dest, header, header_mask, begin, end - begin); 4519 } 4520 4521 template <typename String, typename Header> 4522 PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, float value) 3360 4523 { 3361 4524 char buf[128]; 3362 sprintf(buf, "%d", value); 4525 sprintf(buf, "%.9g", value); 4526 4527 return set_value_ascii(dest, header, header_mask, buf); 4528 } 4529 4530 template <typename String, typename Header> 4531 PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, double value) 4532 { 4533 char buf[128]; 4534 sprintf(buf, "%.17g", value); 4535 4536 return set_value_ascii(dest, header, header_mask, buf); 4537 } 3363 4538 3364 return set_value_buffer(dest, header, header_mask, buf); 3365 } 3366 3367 PUGI__FN bool set_value_convert(char_t*& dest, uintptr_t& header, uintptr_t header_mask, unsigned int value) 3368 { 3369 char buf[128]; 3370 sprintf(buf, "%u", value); 3371 3372 return set_value_buffer(dest, header, header_mask, buf); 3373 } 3374 3375 PUGI__FN bool set_value_convert(char_t*& dest, uintptr_t& header, uintptr_t header_mask, double value) 3376 { 3377 char buf[128]; 3378 sprintf(buf, "%g", value); 3379 3380 return set_value_buffer(dest, header, header_mask, buf); 3381 } 3382 3383 PUGI__FN bool set_value_convert(char_t*& dest, uintptr_t& header, uintptr_t header_mask, bool value) 3384 { 3385 return strcpy_insitu(dest, header, header_mask, value ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false")); 4539 template <typename String, typename Header> 4540 PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, bool value) 4541 { 4542 return strcpy_insitu(dest, header, header_mask, value ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"), value ? 4 : 5); 4543 } 4544 4545 #ifdef PUGIXML_HAS_LONG_LONG 4546 template <typename String, typename Header> 4547 PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, long long value) 4548 { 4549 char_t buf[64]; 4550 char_t* end = buf + sizeof(buf) / sizeof(buf[0]); 4551 char_t* begin = integer_to_string<unsigned long long>(buf, end, value, value < 0); 4552 4553 return strcpy_insitu(dest, header, header_mask, begin, end - begin); 4554 } 4555 4556 template <typename String, typename Header> 4557 PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, unsigned long long value) 4558 { 4559 char_t buf[64]; 4560 char_t* end = buf + sizeof(buf) / sizeof(buf[0]); 4561 char_t* begin = integer_to_string<unsigned long long>(buf, end, value, false); 4562 4563 return strcpy_insitu(dest, header, header_mask, begin, end - begin); 4564 } 4565 #endif 4566 4567 PUGI__FN xml_parse_result load_buffer_impl(xml_document_struct* doc, xml_node_struct* root, void* contents, size_t size, unsigned int options, xml_encoding encoding, bool is_mutable, bool own, char_t** out_buffer) 4568 { 4569 // check input buffer 4570 if (!contents && size) return make_parse_result(status_io_error); 4571 4572 // get actual encoding 4573 xml_encoding buffer_encoding = impl::get_buffer_encoding(encoding, contents, size); 4574 4575 // get private buffer 4576 char_t* buffer = 0; 4577 size_t length = 0; 4578 4579 if (!impl::convert_buffer(buffer, length, buffer_encoding, contents, size, is_mutable)) return impl::make_parse_result(status_out_of_memory); 4580 4581 // delete original buffer if we performed a conversion 4582 if (own && buffer != contents && contents) impl::xml_memory::deallocate(contents); 4583 4584 // grab onto buffer if it's our buffer, user is responsible for deallocating contents himself 4585 if (own || buffer != contents) *out_buffer = buffer; 4586 4587 // store buffer for offset_debug 4588 doc->buffer = buffer; 4589 4590 // parse 4591 xml_parse_result res = impl::xml_parser::parse(buffer, length, doc, root, options); 4592 4593 // remember encoding 4594 res.encoding = buffer_encoding; 4595 4596 return res; 3386 4597 } 3387 4598 … … 3396 4607 length_type length = _ftelli64(file); 3397 4608 _fseeki64(file, 0, SEEK_SET); 3398 #elif defined(__MINGW32__) && !defined(__NO_MINGW_LFS) && !defined(__STRICT_ANSI__)4609 #elif defined(__MINGW32__) && !defined(__NO_MINGW_LFS) && (!defined(__STRICT_ANSI__) || defined(__MINGW64_VERSION_MAJOR)) 3399 4610 // there are 64-bit versions of fseek/ftell, let's use them 3400 4611 typedef off64_t length_type; … … 3426 4637 } 3427 4638 3428 PUGI__FN xml_parse_result load_file_impl(xml_document& doc, FILE* file, unsigned int options, xml_encoding encoding) 4639 // This function assumes that buffer has extra sizeof(char_t) writable bytes after size 4640 PUGI__FN size_t zero_terminate_buffer(void* buffer, size_t size, xml_encoding encoding) 4641 { 4642 // We only need to zero-terminate if encoding conversion does not do it for us 4643 #ifdef PUGIXML_WCHAR_MODE 4644 xml_encoding wchar_encoding = get_wchar_encoding(); 4645 4646 if (encoding == wchar_encoding || need_endian_swap_utf(encoding, wchar_encoding)) 4647 { 4648 size_t length = size / sizeof(char_t); 4649 4650 static_cast<char_t*>(buffer)[length] = 0; 4651 return (length + 1) * sizeof(char_t); 4652 } 4653 #else 4654 if (encoding == encoding_utf8) 4655 { 4656 static_cast<char*>(buffer)[size] = 0; 4657 return size + 1; 4658 } 4659 #endif 4660 4661 return size; 4662 } 4663 4664 PUGI__FN xml_parse_result load_file_impl(xml_document_struct* doc, FILE* file, unsigned int options, xml_encoding encoding, char_t** out_buffer) 3429 4665 { 3430 4666 if (!file) return make_parse_result(status_file_not_found); … … 3433 4669 size_t size = 0; 3434 4670 xml_parse_status size_status = get_file_size(file, size); 3435 3436 if (size_status != status_ok) 3437 { 3438 fclose(file); 3439 return make_parse_result(size_status); 3440 } 4671 if (size_status != status_ok) return make_parse_result(size_status); 3441 4672 4673 size_t max_suffix_size = sizeof(char_t); 4674 3442 4675 // allocate buffer for the whole file 3443 char* contents = static_cast<char*>(xml_memory::allocate(size > 0 ? size : 1)); 3444 3445 if (!contents) 3446 { 3447 fclose(file); 3448 return make_parse_result(status_out_of_memory); 3449 } 4676 char* contents = static_cast<char*>(xml_memory::allocate(size + max_suffix_size)); 4677 if (!contents) return make_parse_result(status_out_of_memory); 3450 4678 3451 4679 // read file in memory 3452 4680 size_t read_size = fread(contents, 1, size, file); 3453 fclose(file);3454 4681 3455 4682 if (read_size != size) … … 3458 4685 return make_parse_result(status_io_error); 3459 4686 } 3460 3461 return doc.load_buffer_inplace_own(contents, size, options, encoding); 4687 4688 xml_encoding real_encoding = get_buffer_encoding(encoding, contents, size); 4689 4690 return load_buffer_impl(doc, doc, contents, zero_terminate_buffer(contents, size, real_encoding), options, real_encoding, true, true, out_buffer); 3462 4691 } 3463 4692 … … 3468 4697 { 3469 4698 void* memory = xml_memory::allocate(sizeof(xml_stream_chunk)); 4699 if (!memory) return 0; 3470 4700 3471 4701 return new (memory) xml_stream_chunk(); 3472 4702 } 3473 4703 3474 static void destroy(void* ptr) 3475 { 3476 xml_stream_chunk* chunk = static_cast<xml_stream_chunk*>(ptr); 3477 4704 static void destroy(xml_stream_chunk* chunk) 4705 { 3478 4706 // free chunk chain 3479 4707 while (chunk) 3480 4708 { 3481 xml_stream_chunk* next = chunk->next; 4709 xml_stream_chunk* next_ = chunk->next; 4710 3482 4711 xml_memory::deallocate(chunk); 3483 chunk = next; 4712 4713 chunk = next_; 3484 4714 } 3485 4715 } … … 3497 4727 template <typename T> PUGI__FN xml_parse_status load_stream_data_noseek(std::basic_istream<T>& stream, void** out_buffer, size_t* out_size) 3498 4728 { 3499 buffer_holderchunks(0, xml_stream_chunk<T>::destroy);4729 auto_deleter<xml_stream_chunk<T> > chunks(0, xml_stream_chunk<T>::destroy); 3500 4730 3501 4731 // read file to a chunk list … … 3525 4755 } 3526 4756 4757 size_t max_suffix_size = sizeof(char_t); 4758 3527 4759 // copy chunk list to a contiguous buffer 3528 char* buffer = static_cast<char*>(xml_memory::allocate(total ));4760 char* buffer = static_cast<char*>(xml_memory::allocate(total + max_suffix_size)); 3529 4761 if (!buffer) return status_out_of_memory; 3530 4762 3531 4763 char* write = buffer; 3532 4764 3533 for (xml_stream_chunk<T>* chunk = static_cast<xml_stream_chunk<T>*>(chunks.data); chunk; chunk = chunk->next)4765 for (xml_stream_chunk<T>* chunk = chunks.data; chunk; chunk = chunk->next) 3534 4766 { 3535 4767 assert(write + chunk->size <= buffer + total); … … 3562 4794 if (static_cast<std::streamsize>(read_length) != length || length < 0) return status_out_of_memory; 3563 4795 4796 size_t max_suffix_size = sizeof(char_t); 4797 3564 4798 // read stream data into memory (guard against stream exceptions with buffer holder) 3565 buffer_holder buffer(xml_memory::allocate((read_length > 0 ? read_length : 1) * sizeof(T)), xml_memory::deallocate);4799 auto_deleter<void> buffer(xml_memory::allocate(read_length * sizeof(T) + max_suffix_size), xml_memory::deallocate); 3566 4800 if (!buffer.data) return status_out_of_memory; 3567 4801 … … 3574 4808 size_t actual_length = static_cast<size_t>(stream.gcount()); 3575 4809 assert(actual_length <= read_length); 3576 4810 3577 4811 *out_buffer = buffer.release(); 3578 4812 *out_size = actual_length * sizeof(T); … … 3581 4815 } 3582 4816 3583 template <typename T> PUGI__FN xml_parse_result load_stream_impl(xml_document & doc, std::basic_istream<T>& stream, unsigned int options, xml_encoding encoding)4817 template <typename T> PUGI__FN xml_parse_result load_stream_impl(xml_document_struct* doc, std::basic_istream<T>& stream, unsigned int options, xml_encoding encoding, char_t** out_buffer) 3584 4818 { 3585 4819 void* buffer = 0; 3586 4820 size_t size = 0; 4821 xml_parse_status status = status_ok; 4822 4823 // if stream has an error bit set, bail out (otherwise tellg() can fail and we'll clear error bits) 4824 if (stream.fail()) return make_parse_result(status_io_error); 3587 4825 3588 4826 // load stream to memory (using seek-based implementation if possible, since it's faster and takes less memory) 3589 xml_parse_status status = (stream.tellg() < 0) ? load_stream_data_noseek(stream, &buffer, &size) : load_stream_data_seek(stream, &buffer, &size); 4827 if (stream.tellg() < 0) 4828 { 4829 stream.clear(); // clear error flags that could be set by a failing tellg 4830 status = load_stream_data_noseek(stream, &buffer, &size); 4831 } 4832 else 4833 status = load_stream_data_seek(stream, &buffer, &size); 4834 3590 4835 if (status != status_ok) return make_parse_result(status); 3591 4836 3592 return doc.load_buffer_inplace_own(buffer, size, options, encoding); 4837 xml_encoding real_encoding = get_buffer_encoding(encoding, buffer, size); 4838 4839 return load_buffer_impl(doc, doc, buffer, zero_terminate_buffer(buffer, size, real_encoding), options, real_encoding, true, true, out_buffer); 3593 4840 } 3594 4841 #endif 3595 4842 3596 #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__) || (defined(__MINGW32__) && !defined(__STRICT_ANSI__))4843 #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__) || (defined(__MINGW32__) && (!defined(__STRICT_ANSI__) || defined(__MINGW64_VERSION_MAJOR))) 3597 4844 PUGI__FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode) 3598 4845 { … … 3605 4852 3606 4853 // first pass: get length in utf8 characters 3607 size_t length = wcslen(str);4854 size_t length = strlength_wide(str); 3608 4855 size_t size = as_utf8_begin(str, length); 3609 4856 … … 3615 4862 as_utf8_end(result, size, str, length); 3616 4863 4864 // zero-terminate 4865 result[size] = 0; 4866 3617 4867 return result; 3618 4868 } … … 3645 4895 doc.save(writer, indent, flags, encoding); 3646 4896 3647 int result = ferror(file); 3648 3649 fclose(file); 3650 3651 return result == 0; 3652 } 4897 return ferror(file) == 0; 4898 } 4899 4900 struct name_null_sentry 4901 { 4902 xml_node_struct* node; 4903 char_t* name; 4904 4905 name_null_sentry(xml_node_struct* node_): node(node_), name(node_->name) 4906 { 4907 node->name = 0; 4908 } 4909 4910 ~name_null_sentry() 4911 { 4912 node->name = name; 4913 } 4914 }; 3653 4915 PUGI__NS_END 3654 4916 … … 3778 5040 PUGI__FN const char_t* xml_attribute::as_string(const char_t* def) const 3779 5041 { 3780 return (_attr && _attr->value) ? _attr->value : def;5042 return (_attr && _attr->value) ? _attr->value + 0 : def; 3781 5043 } 3782 5044 3783 5045 PUGI__FN int xml_attribute::as_int(int def) const 3784 5046 { 3785 return impl::get_value_int(_attr ? _attr->value : 0, def);5047 return (_attr && _attr->value) ? impl::get_value_int(_attr->value) : def; 3786 5048 } 3787 5049 3788 5050 PUGI__FN unsigned int xml_attribute::as_uint(unsigned int def) const 3789 5051 { 3790 return impl::get_value_uint(_attr ? _attr->value : 0, def);5052 return (_attr && _attr->value) ? impl::get_value_uint(_attr->value) : def; 3791 5053 } 3792 5054 3793 5055 PUGI__FN double xml_attribute::as_double(double def) const 3794 5056 { 3795 return impl::get_value_double(_attr ? _attr->value : 0, def);5057 return (_attr && _attr->value) ? impl::get_value_double(_attr->value) : def; 3796 5058 } 3797 5059 3798 5060 PUGI__FN float xml_attribute::as_float(float def) const 3799 5061 { 3800 return impl::get_value_float(_attr ? _attr->value : 0, def);5062 return (_attr && _attr->value) ? impl::get_value_float(_attr->value) : def; 3801 5063 } 3802 5064 3803 5065 PUGI__FN bool xml_attribute::as_bool(bool def) const 3804 5066 { 3805 return impl::get_value_bool(_attr ? _attr->value : 0, def); 3806 } 5067 return (_attr && _attr->value) ? impl::get_value_bool(_attr->value) : def; 5068 } 5069 5070 #ifdef PUGIXML_HAS_LONG_LONG 5071 PUGI__FN long long xml_attribute::as_llong(long long def) const 5072 { 5073 return (_attr && _attr->value) ? impl::get_value_llong(_attr->value) : def; 5074 } 5075 5076 PUGI__FN unsigned long long xml_attribute::as_ullong(unsigned long long def) const 5077 { 5078 return (_attr && _attr->value) ? impl::get_value_ullong(_attr->value) : def; 5079 } 5080 #endif 3807 5081 3808 5082 PUGI__FN bool xml_attribute::empty() const … … 3813 5087 PUGI__FN const char_t* xml_attribute::name() const 3814 5088 { 3815 return (_attr && _attr->name) ? _attr->name : PUGIXML_TEXT("");5089 return (_attr && _attr->name) ? _attr->name + 0 : PUGIXML_TEXT(""); 3816 5090 } 3817 5091 3818 5092 PUGI__FN const char_t* xml_attribute::value() const 3819 5093 { 3820 return (_attr && _attr->value) ? _attr->value : PUGIXML_TEXT("");5094 return (_attr && _attr->value) ? _attr->value + 0 : PUGIXML_TEXT(""); 3821 5095 } 3822 5096 … … 3855 5129 } 3856 5130 3857 PUGI__FN xml_attribute& xml_attribute::operator=( boolrhs)5131 PUGI__FN xml_attribute& xml_attribute::operator=(float rhs) 3858 5132 { 3859 5133 set_value(rhs); … … 3861 5135 } 3862 5136 5137 PUGI__FN xml_attribute& xml_attribute::operator=(bool rhs) 5138 { 5139 set_value(rhs); 5140 return *this; 5141 } 5142 5143 #ifdef PUGIXML_HAS_LONG_LONG 5144 PUGI__FN xml_attribute& xml_attribute::operator=(long long rhs) 5145 { 5146 set_value(rhs); 5147 return *this; 5148 } 5149 5150 PUGI__FN xml_attribute& xml_attribute::operator=(unsigned long long rhs) 5151 { 5152 set_value(rhs); 5153 return *this; 5154 } 5155 #endif 5156 3863 5157 PUGI__FN bool xml_attribute::set_name(const char_t* rhs) 3864 5158 { 3865 5159 if (!_attr) return false; 3866 5160 3867 return impl::strcpy_insitu(_attr->name, _attr->header, impl::xml_memory_page_name_allocated_mask, rhs );5161 return impl::strcpy_insitu(_attr->name, _attr->header, impl::xml_memory_page_name_allocated_mask, rhs, impl::strlength(rhs)); 3868 5162 } 3869 5163 … … 3872 5166 if (!_attr) return false; 3873 5167 3874 return impl::strcpy_insitu(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs );5168 return impl::strcpy_insitu(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs)); 3875 5169 } 3876 5170 … … 3896 5190 } 3897 5191 5192 PUGI__FN bool xml_attribute::set_value(float rhs) 5193 { 5194 if (!_attr) return false; 5195 5196 return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs); 5197 } 5198 3898 5199 PUGI__FN bool xml_attribute::set_value(bool rhs) 3899 5200 { … … 3902 5203 return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs); 3903 5204 } 5205 5206 #ifdef PUGIXML_HAS_LONG_LONG 5207 PUGI__FN bool xml_attribute::set_value(long long rhs) 5208 { 5209 if (!_attr) return false; 5210 5211 return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs); 5212 } 5213 5214 PUGI__FN bool xml_attribute::set_value(unsigned long long rhs) 5215 { 5216 if (!_attr) return false; 5217 5218 return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs); 5219 } 5220 #endif 3904 5221 3905 5222 #ifdef __BORLANDC__ … … 3939 5256 PUGI__FN xml_node::iterator xml_node::begin() const 3940 5257 { 3941 return iterator(_root ? _root->first_child : 0, _root);5258 return iterator(_root ? _root->first_child + 0 : 0, _root); 3942 5259 } 3943 5260 … … 3949 5266 PUGI__FN xml_node::attribute_iterator xml_node::attributes_begin() const 3950 5267 { 3951 return attribute_iterator(_root ? _root->first_attribute : 0, _root);5268 return attribute_iterator(_root ? _root->first_attribute + 0 : 0, _root); 3952 5269 } 3953 5270 … … 3964 5281 PUGI__FN xml_object_range<xml_named_node_iterator> xml_node::children(const char_t* name_) const 3965 5282 { 3966 return xml_object_range<xml_named_node_iterator>(xml_named_node_iterator(child(name_) , name_), xml_named_node_iterator());5283 return xml_object_range<xml_named_node_iterator>(xml_named_node_iterator(child(name_)._root, _root, name_), xml_named_node_iterator(0, _root, name_)); 3967 5284 } 3968 5285 … … 4009 5326 PUGI__FN const char_t* xml_node::name() const 4010 5327 { 4011 return (_root && _root->name) ? _root->name : PUGIXML_TEXT("");5328 return (_root && _root->name) ? _root->name + 0 : PUGIXML_TEXT(""); 4012 5329 } 4013 5330 4014 5331 PUGI__FN xml_node_type xml_node::type() const 4015 5332 { 4016 return _root ? static_cast<xml_node_type>((_root->header & impl::xml_memory_page_type_mask) + 1) : node_null;5333 return _root ? PUGI__NODETYPE(_root) : node_null; 4017 5334 } 4018 5335 4019 5336 PUGI__FN const char_t* xml_node::value() const 4020 5337 { 4021 return (_root && _root->value) ? _root->value : PUGIXML_TEXT("");5338 return (_root && _root->value) ? _root->value + 0 : PUGIXML_TEXT(""); 4022 5339 } 4023 5340 … … 4055 5372 PUGI__FN xml_node xml_node::next_sibling() const 4056 5373 { 4057 if (!_root) return xml_node(); 4058 4059 if (_root->next_sibling) return xml_node(_root->next_sibling); 4060 else return xml_node(); 5374 return _root ? xml_node(_root->next_sibling) : xml_node(); 4061 5375 } 4062 5376 … … 4071 5385 } 4072 5386 5387 PUGI__FN xml_attribute xml_node::attribute(const char_t* name_, xml_attribute& hint_) const 5388 { 5389 xml_attribute_struct* hint = hint_._attr; 5390 5391 // if hint is not an attribute of node, behavior is not defined 5392 assert(!hint || (_root && impl::is_attribute_of(hint, _root))); 5393 5394 if (!_root) return xml_attribute(); 5395 5396 // optimistically search from hint up until the end 5397 for (xml_attribute_struct* i = hint; i; i = i->next_attribute) 5398 if (i->name && impl::strequal(name_, i->name)) 5399 { 5400 // update hint to maximize efficiency of searching for consecutive attributes 5401 hint_._attr = i->next_attribute; 5402 5403 return xml_attribute(i); 5404 } 5405 5406 // wrap around and search from the first attribute until the hint 5407 // 'j' null pointer check is technically redundant, but it prevents a crash in case the assertion above fails 5408 for (xml_attribute_struct* j = _root->first_attribute; j && j != hint; j = j->next_attribute) 5409 if (j->name && impl::strequal(name_, j->name)) 5410 { 5411 // update hint to maximize efficiency of searching for consecutive attributes 5412 hint_._attr = j->next_attribute; 5413 5414 return xml_attribute(j); 5415 } 5416 5417 return xml_attribute(); 5418 } 5419 4073 5420 PUGI__FN xml_node xml_node::previous_sibling() const 4074 5421 { … … 4086 5433 PUGI__FN xml_node xml_node::root() const 4087 5434 { 4088 if (!_root) return xml_node(); 4089 4090 impl::xml_memory_page* page = reinterpret_cast<impl::xml_memory_page*>(_root->header & impl::xml_memory_page_pointer_mask); 4091 4092 return xml_node(static_cast<impl::xml_document_struct*>(page->allocator)); 5435 return _root ? xml_node(&impl::get_document(_root)) : xml_node(); 4093 5436 } 4094 5437 … … 4103 5446 4104 5447 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) 4105 if (i ->value && impl::is_text_node(i))5448 if (impl::is_text_node(i) && i->value) 4106 5449 return i->value; 4107 5450 … … 4136 5479 PUGI__FN bool xml_node::set_name(const char_t* rhs) 4137 5480 { 4138 switch (type()) 4139 { 4140 case node_pi: 4141 case node_declaration: 4142 case node_element: 4143 return impl::strcpy_insitu(_root->name, _root->header, impl::xml_memory_page_name_allocated_mask, rhs); 4144 4145 default: 5481 xml_node_type type_ = _root ? PUGI__NODETYPE(_root) : node_null; 5482 5483 if (type_ != node_element && type_ != node_pi && type_ != node_declaration) 4146 5484 return false; 4147 } 5485 5486 return impl::strcpy_insitu(_root->name, _root->header, impl::xml_memory_page_name_allocated_mask, rhs, impl::strlength(rhs)); 4148 5487 } 4149 5488 4150 5489 PUGI__FN bool xml_node::set_value(const char_t* rhs) 4151 5490 { 4152 switch (type()) 4153 { 4154 case node_pi: 4155 case node_cdata: 4156 case node_pcdata: 4157 case node_comment: 4158 case node_doctype: 4159 return impl::strcpy_insitu(_root->value, _root->header, impl::xml_memory_page_value_allocated_mask, rhs); 4160 4161 default: 5491 xml_node_type type_ = _root ? PUGI__NODETYPE(_root) : node_null; 5492 5493 if (type_ != node_pcdata && type_ != node_cdata && type_ != node_comment && type_ != node_pi && type_ != node_doctype) 4162 5494 return false; 4163 } 5495 5496 return impl::strcpy_insitu(_root->value, _root->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs)); 4164 5497 } 4165 5498 4166 5499 PUGI__FN xml_attribute xml_node::append_attribute(const char_t* name_) 4167 5500 { 4168 if ( type() != node_element && type() != node_declaration) return xml_attribute();5501 if (!impl::allow_insert_attribute(type())) return xml_attribute(); 4169 5502 4170 xml_attribute a(impl::append_attribute_ll(_root, impl::get_allocator(_root))); 5503 impl::xml_allocator& alloc = impl::get_allocator(_root); 5504 if (!alloc.reserve()) return xml_attribute(); 5505 5506 xml_attribute a(impl::allocate_attribute(alloc)); 5507 if (!a) return xml_attribute(); 5508 5509 impl::append_attribute(a._attr, _root); 5510 4171 5511 a.set_name(name_); 4172 5512 … … 4176 5516 PUGI__FN xml_attribute xml_node::prepend_attribute(const char_t* name_) 4177 5517 { 4178 if ( type() != node_element && type() != node_declaration) return xml_attribute();5518 if (!impl::allow_insert_attribute(type())) return xml_attribute(); 4179 5519 4180 xml_attribute a(impl::allocate_attribute(impl::get_allocator(_root))); 5520 impl::xml_allocator& alloc = impl::get_allocator(_root); 5521 if (!alloc.reserve()) return xml_attribute(); 5522 5523 xml_attribute a(impl::allocate_attribute(alloc)); 4181 5524 if (!a) return xml_attribute(); 4182 5525 5526 impl::prepend_attribute(a._attr, _root); 5527 4183 5528 a.set_name(name_); 5529 5530 return a; 5531 } 5532 5533 PUGI__FN xml_attribute xml_node::insert_attribute_after(const char_t* name_, const xml_attribute& attr) 5534 { 5535 if (!impl::allow_insert_attribute(type())) return xml_attribute(); 5536 if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute(); 4184 5537 4185 xml_attribute_struct* head = _root->first_attribute; 4186 4187 if (head) 4188 { 4189 a._attr->prev_attribute_c = head->prev_attribute_c; 4190 head->prev_attribute_c = a._attr; 4191 } 4192 else 4193 a._attr->prev_attribute_c = a._attr; 5538 impl::xml_allocator& alloc = impl::get_allocator(_root); 5539 if (!alloc.reserve()) return xml_attribute(); 5540 5541 xml_attribute a(impl::allocate_attribute(alloc)); 5542 if (!a) return xml_attribute(); 5543 5544 impl::insert_attribute_after(a._attr, attr._attr, _root); 5545 5546 a.set_name(name_); 5547 5548 return a; 5549 } 5550 5551 PUGI__FN xml_attribute xml_node::insert_attribute_before(const char_t* name_, const xml_attribute& attr) 5552 { 5553 if (!impl::allow_insert_attribute(type())) return xml_attribute(); 5554 if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute(); 4194 5555 4195 a._attr->next_attribute = head; 4196 _root->first_attribute = a._attr; 4197 5556 impl::xml_allocator& alloc = impl::get_allocator(_root); 5557 if (!alloc.reserve()) return xml_attribute(); 5558 5559 xml_attribute a(impl::allocate_attribute(alloc)); 5560 if (!a) return xml_attribute(); 5561 5562 impl::insert_attribute_before(a._attr, attr._attr, _root); 5563 5564 a.set_name(name_); 5565 4198 5566 return a; 4199 5567 } 4200 5568 4201 PUGI__FN xml_attribute xml_node::insert_attribute_before(const char_t* name_, const xml_attribute& attr) 4202 { 4203 if ((type() != node_element && type() != node_declaration) || attr.empty()) return xml_attribute(); 5569 PUGI__FN xml_attribute xml_node::append_copy(const xml_attribute& proto) 5570 { 5571 if (!proto) return xml_attribute(); 5572 if (!impl::allow_insert_attribute(type())) return xml_attribute(); 5573 5574 impl::xml_allocator& alloc = impl::get_allocator(_root); 5575 if (!alloc.reserve()) return xml_attribute(); 5576 5577 xml_attribute a(impl::allocate_attribute(alloc)); 5578 if (!a) return xml_attribute(); 5579 5580 impl::append_attribute(a._attr, _root); 5581 impl::node_copy_attribute(a._attr, proto._attr); 5582 5583 return a; 5584 } 5585 5586 PUGI__FN xml_attribute xml_node::prepend_copy(const xml_attribute& proto) 5587 { 5588 if (!proto) return xml_attribute(); 5589 if (!impl::allow_insert_attribute(type())) return xml_attribute(); 5590 5591 impl::xml_allocator& alloc = impl::get_allocator(_root); 5592 if (!alloc.reserve()) return xml_attribute(); 5593 5594 xml_attribute a(impl::allocate_attribute(alloc)); 5595 if (!a) return xml_attribute(); 5596 5597 impl::prepend_attribute(a._attr, _root); 5598 impl::node_copy_attribute(a._attr, proto._attr); 5599 5600 return a; 5601 } 5602 5603 PUGI__FN xml_attribute xml_node::insert_copy_after(const xml_attribute& proto, const xml_attribute& attr) 5604 { 5605 if (!proto) return xml_attribute(); 5606 if (!impl::allow_insert_attribute(type())) return xml_attribute(); 5607 if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute(); 5608 5609 impl::xml_allocator& alloc = impl::get_allocator(_root); 5610 if (!alloc.reserve()) return xml_attribute(); 5611 5612 xml_attribute a(impl::allocate_attribute(alloc)); 5613 if (!a) return xml_attribute(); 5614 5615 impl::insert_attribute_after(a._attr, attr._attr, _root); 5616 impl::node_copy_attribute(a._attr, proto._attr); 5617 5618 return a; 5619 } 5620 5621 PUGI__FN xml_attribute xml_node::insert_copy_before(const xml_attribute& proto, const xml_attribute& attr) 5622 { 5623 if (!proto) return xml_attribute(); 5624 if (!impl::allow_insert_attribute(type())) return xml_attribute(); 5625 if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute(); 5626 5627 impl::xml_allocator& alloc = impl::get_allocator(_root); 5628 if (!alloc.reserve()) return xml_attribute(); 5629 5630 xml_attribute a(impl::allocate_attribute(alloc)); 5631 if (!a) return xml_attribute(); 5632 5633 impl::insert_attribute_before(a._attr, attr._attr, _root); 5634 impl::node_copy_attribute(a._attr, proto._attr); 5635 5636 return a; 5637 } 5638 5639 PUGI__FN xml_node xml_node::append_child(xml_node_type type_) 5640 { 5641 if (!impl::allow_insert_child(type(), type_)) return xml_node(); 4204 5642 4205 // check that attribute belongs to *this 4206 xml_attribute_struct* cur = attr._attr; 4207 4208 while (cur->prev_attribute_c->next_attribute) cur = cur->prev_attribute_c; 4209 4210 if (cur != _root->first_attribute) return xml_attribute(); 4211 4212 xml_attribute a(impl::allocate_attribute(impl::get_allocator(_root))); 4213 if (!a) return xml_attribute(); 4214 4215 a.set_name(name_); 4216 4217 if (attr._attr->prev_attribute_c->next_attribute) 4218 attr._attr->prev_attribute_c->next_attribute = a._attr; 4219 else 4220 _root->first_attribute = a._attr; 5643 impl::xml_allocator& alloc = impl::get_allocator(_root); 5644 if (!alloc.reserve()) return xml_node(); 5645 5646 xml_node n(impl::allocate_node(alloc, type_)); 5647 if (!n) return xml_node(); 5648 5649 impl::append_node(n._root, _root); 5650 5651 if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml")); 5652 5653 return n; 5654 } 5655 5656 PUGI__FN xml_node xml_node::prepend_child(xml_node_type type_) 5657 { 5658 if (!impl::allow_insert_child(type(), type_)) return xml_node(); 5659 5660 impl::xml_allocator& alloc = impl::get_allocator(_root); 5661 if (!alloc.reserve()) return xml_node(); 4221 5662 4222 a._attr->prev_attribute_c = attr._attr->prev_attribute_c; 4223 a._attr->next_attribute = attr._attr; 4224 attr._attr->prev_attribute_c = a._attr; 4225 4226 return a; 4227 } 4228 4229 PUGI__FN xml_attribute xml_node::insert_attribute_after(const char_t* name_, const xml_attribute& attr) 4230 { 4231 if ((type() != node_element && type() != node_declaration) || attr.empty()) return xml_attribute(); 4232 4233 // check that attribute belongs to *this 4234 xml_attribute_struct* cur = attr._attr; 4235 4236 while (cur->prev_attribute_c->next_attribute) cur = cur->prev_attribute_c; 4237 4238 if (cur != _root->first_attribute) return xml_attribute(); 4239 4240 xml_attribute a(impl::allocate_attribute(impl::get_allocator(_root))); 4241 if (!a) return xml_attribute(); 4242 4243 a.set_name(name_); 4244 4245 if (attr._attr->next_attribute) 4246 attr._attr->next_attribute->prev_attribute_c = a._attr; 4247 else 4248 _root->first_attribute->prev_attribute_c = a._attr; 4249 4250 a._attr->next_attribute = attr._attr->next_attribute; 4251 a._attr->prev_attribute_c = attr._attr; 4252 attr._attr->next_attribute = a._attr; 4253 4254 return a; 4255 } 4256 4257 PUGI__FN xml_attribute xml_node::append_copy(const xml_attribute& proto) 4258 { 4259 if (!proto) return xml_attribute(); 4260 4261 xml_attribute result = append_attribute(proto.name()); 4262 result.set_value(proto.value()); 4263 4264 return result; 4265 } 4266 4267 PUGI__FN xml_attribute xml_node::prepend_copy(const xml_attribute& proto) 4268 { 4269 if (!proto) return xml_attribute(); 4270 4271 xml_attribute result = prepend_attribute(proto.name()); 4272 result.set_value(proto.value()); 4273 4274 return result; 4275 } 4276 4277 PUGI__FN xml_attribute xml_node::insert_copy_after(const xml_attribute& proto, const xml_attribute& attr) 4278 { 4279 if (!proto) return xml_attribute(); 4280 4281 xml_attribute result = insert_attribute_after(proto.name(), attr); 4282 result.set_value(proto.value()); 4283 4284 return result; 4285 } 4286 4287 PUGI__FN xml_attribute xml_node::insert_copy_before(const xml_attribute& proto, const xml_attribute& attr) 4288 { 4289 if (!proto) return xml_attribute(); 4290 4291 xml_attribute result = insert_attribute_before(proto.name(), attr); 4292 result.set_value(proto.value()); 4293 4294 return result; 4295 } 4296 4297 PUGI__FN xml_node xml_node::append_child(xml_node_type type_) 4298 { 4299 if (!impl::allow_insert_child(this->type(), type_)) return xml_node(); 4300 4301 xml_node n(impl::append_node(_root, impl::get_allocator(_root), type_)); 4302 4303 if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml")); 4304 4305 return n; 4306 } 4307 4308 PUGI__FN xml_node xml_node::prepend_child(xml_node_type type_) 4309 { 4310 if (!impl::allow_insert_child(this->type(), type_)) return xml_node(); 4311 4312 xml_node n(impl::allocate_node(impl::get_allocator(_root), type_)); 5663 xml_node n(impl::allocate_node(alloc, type_)); 4313 5664 if (!n) return xml_node(); 4314 5665 4315 n._root->parent = _root; 4316 4317 xml_node_struct* head = _root->first_child; 4318 4319 if (head) 4320 { 4321 n._root->prev_sibling_c = head->prev_sibling_c; 4322 head->prev_sibling_c = n._root; 4323 } 4324 else 4325 n._root->prev_sibling_c = n._root; 4326 4327 n._root->next_sibling = head; 4328 _root->first_child = n._root; 5666 impl::prepend_node(n._root, _root); 4329 5667 4330 5668 if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml")); … … 4335 5673 PUGI__FN xml_node xml_node::insert_child_before(xml_node_type type_, const xml_node& node) 4336 5674 { 4337 if (!impl::allow_insert_child(t his->type(), type_)) return xml_node();5675 if (!impl::allow_insert_child(type(), type_)) return xml_node(); 4338 5676 if (!node._root || node._root->parent != _root) return xml_node(); 5677 5678 impl::xml_allocator& alloc = impl::get_allocator(_root); 5679 if (!alloc.reserve()) return xml_node(); 4339 5680 4340 xml_node n(impl::allocate_node( impl::get_allocator(_root), type_));5681 xml_node n(impl::allocate_node(alloc, type_)); 4341 5682 if (!n) return xml_node(); 4342 5683 4343 n._root->parent = _root; 5684 impl::insert_node_before(n._root, node._root); 5685 5686 if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml")); 5687 5688 return n; 5689 } 5690 5691 PUGI__FN xml_node xml_node::insert_child_after(xml_node_type type_, const xml_node& node) 5692 { 5693 if (!impl::allow_insert_child(type(), type_)) return xml_node(); 5694 if (!node._root || node._root->parent != _root) return xml_node(); 5695 5696 impl::xml_allocator& alloc = impl::get_allocator(_root); 5697 if (!alloc.reserve()) return xml_node(); 5698 5699 xml_node n(impl::allocate_node(alloc, type_)); 5700 if (!n) return xml_node(); 5701 5702 impl::insert_node_after(n._root, node._root); 5703 5704 if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml")); 5705 5706 return n; 5707 } 5708 5709 PUGI__FN xml_node xml_node::append_child(const char_t* name_) 5710 { 5711 xml_node result = append_child(node_element); 5712 5713 result.set_name(name_); 5714 5715 return result; 5716 } 5717 5718 PUGI__FN xml_node xml_node::prepend_child(const char_t* name_) 5719 { 5720 xml_node result = prepend_child(node_element); 5721 5722 result.set_name(name_); 5723 5724 return result; 5725 } 5726 5727 PUGI__FN xml_node xml_node::insert_child_after(const char_t* name_, const xml_node& node) 5728 { 5729 xml_node result = insert_child_after(node_element, node); 5730 5731 result.set_name(name_); 5732 5733 return result; 5734 } 5735 5736 PUGI__FN xml_node xml_node::insert_child_before(const char_t* name_, const xml_node& node) 5737 { 5738 xml_node result = insert_child_before(node_element, node); 5739 5740 result.set_name(name_); 5741 5742 return result; 5743 } 5744 5745 PUGI__FN xml_node xml_node::append_copy(const xml_node& proto) 5746 { 5747 xml_node_type type_ = proto.type(); 5748 if (!impl::allow_insert_child(type(), type_)) return xml_node(); 5749 5750 impl::xml_allocator& alloc = impl::get_allocator(_root); 5751 if (!alloc.reserve()) return xml_node(); 5752 5753 xml_node n(impl::allocate_node(alloc, type_)); 5754 if (!n) return xml_node(); 5755 5756 impl::append_node(n._root, _root); 5757 impl::node_copy_tree(n._root, proto._root); 5758 5759 return n; 5760 } 5761 5762 PUGI__FN xml_node xml_node::prepend_copy(const xml_node& proto) 5763 { 5764 xml_node_type type_ = proto.type(); 5765 if (!impl::allow_insert_child(type(), type_)) return xml_node(); 5766 5767 impl::xml_allocator& alloc = impl::get_allocator(_root); 5768 if (!alloc.reserve()) return xml_node(); 5769 5770 xml_node n(impl::allocate_node(alloc, type_)); 5771 if (!n) return xml_node(); 5772 5773 impl::prepend_node(n._root, _root); 5774 impl::node_copy_tree(n._root, proto._root); 5775 5776 return n; 5777 } 5778 5779 PUGI__FN xml_node xml_node::insert_copy_after(const xml_node& proto, const xml_node& node) 5780 { 5781 xml_node_type type_ = proto.type(); 5782 if (!impl::allow_insert_child(type(), type_)) return xml_node(); 5783 if (!node._root || node._root->parent != _root) return xml_node(); 5784 5785 impl::xml_allocator& alloc = impl::get_allocator(_root); 5786 if (!alloc.reserve()) return xml_node(); 5787 5788 xml_node n(impl::allocate_node(alloc, type_)); 5789 if (!n) return xml_node(); 5790 5791 impl::insert_node_after(n._root, node._root); 5792 impl::node_copy_tree(n._root, proto._root); 5793 5794 return n; 5795 } 5796 5797 PUGI__FN xml_node xml_node::insert_copy_before(const xml_node& proto, const xml_node& node) 5798 { 5799 xml_node_type type_ = proto.type(); 5800 if (!impl::allow_insert_child(type(), type_)) return xml_node(); 5801 if (!node._root || node._root->parent != _root) return xml_node(); 5802 5803 impl::xml_allocator& alloc = impl::get_allocator(_root); 5804 if (!alloc.reserve()) return xml_node(); 5805 5806 xml_node n(impl::allocate_node(alloc, type_)); 5807 if (!n) return xml_node(); 5808 5809 impl::insert_node_before(n._root, node._root); 5810 impl::node_copy_tree(n._root, proto._root); 5811 5812 return n; 5813 } 5814 5815 PUGI__FN xml_node xml_node::append_move(const xml_node& moved) 5816 { 5817 if (!impl::allow_move(*this, moved)) return xml_node(); 5818 5819 impl::xml_allocator& alloc = impl::get_allocator(_root); 5820 if (!alloc.reserve()) return xml_node(); 5821 5822 // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers 5823 impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask; 5824 5825 impl::remove_node(moved._root); 5826 impl::append_node(moved._root, _root); 5827 5828 return moved; 5829 } 5830 5831 PUGI__FN xml_node xml_node::prepend_move(const xml_node& moved) 5832 { 5833 if (!impl::allow_move(*this, moved)) return xml_node(); 5834 5835 impl::xml_allocator& alloc = impl::get_allocator(_root); 5836 if (!alloc.reserve()) return xml_node(); 5837 5838 // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers 5839 impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask; 5840 5841 impl::remove_node(moved._root); 5842 impl::prepend_node(moved._root, _root); 5843 5844 return moved; 5845 } 5846 5847 PUGI__FN xml_node xml_node::insert_move_after(const xml_node& moved, const xml_node& node) 5848 { 5849 if (!impl::allow_move(*this, moved)) return xml_node(); 5850 if (!node._root || node._root->parent != _root) return xml_node(); 5851 if (moved._root == node._root) return xml_node(); 5852 5853 impl::xml_allocator& alloc = impl::get_allocator(_root); 5854 if (!alloc.reserve()) return xml_node(); 5855 5856 // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers 5857 impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask; 5858 5859 impl::remove_node(moved._root); 5860 impl::insert_node_after(moved._root, node._root); 5861 5862 return moved; 5863 } 5864 5865 PUGI__FN xml_node xml_node::insert_move_before(const xml_node& moved, const xml_node& node) 5866 { 5867 if (!impl::allow_move(*this, moved)) return xml_node(); 5868 if (!node._root || node._root->parent != _root) return xml_node(); 5869 if (moved._root == node._root) return xml_node(); 5870 5871 impl::xml_allocator& alloc = impl::get_allocator(_root); 5872 if (!alloc.reserve()) return xml_node(); 5873 5874 // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers 5875 impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask; 5876 5877 impl::remove_node(moved._root); 5878 impl::insert_node_before(moved._root, node._root); 5879 5880 return moved; 5881 } 5882 5883 PUGI__FN bool xml_node::remove_attribute(const char_t* name_) 5884 { 5885 return remove_attribute(attribute(name_)); 5886 } 5887 5888 PUGI__FN bool xml_node::remove_attribute(const xml_attribute& a) 5889 { 5890 if (!_root || !a._attr) return false; 5891 if (!impl::is_attribute_of(a._attr, _root)) return false; 5892 5893 impl::xml_allocator& alloc = impl::get_allocator(_root); 5894 if (!alloc.reserve()) return false; 5895 5896 impl::remove_attribute(a._attr, _root); 5897 impl::destroy_attribute(a._attr, alloc); 5898 5899 return true; 5900 } 5901 5902 PUGI__FN bool xml_node::remove_child(const char_t* name_) 5903 { 5904 return remove_child(child(name_)); 5905 } 5906 5907 PUGI__FN bool xml_node::remove_child(const xml_node& n) 5908 { 5909 if (!_root || !n._root || n._root->parent != _root) return false; 5910 5911 impl::xml_allocator& alloc = impl::get_allocator(_root); 5912 if (!alloc.reserve()) return false; 5913 5914 impl::remove_node(n._root); 5915 impl::destroy_node(n._root, alloc); 5916 5917 return true; 5918 } 5919 5920 PUGI__FN xml_parse_result xml_node::append_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding) 5921 { 5922 // append_buffer is only valid for elements/documents 5923 if (!impl::allow_insert_child(type(), node_element)) return impl::make_parse_result(status_append_invalid_root); 5924 5925 // get document node 5926 impl::xml_document_struct* doc = &impl::get_document(_root); 5927 5928 // disable document_buffer_order optimization since in a document with multiple buffers comparing buffer pointers does not make sense 5929 doc->header |= impl::xml_memory_page_contents_shared_mask; 4344 5930 4345 if (node._root->prev_sibling_c->next_sibling) 4346 node._root->prev_sibling_c->next_sibling = n._root; 4347 else 4348 _root->first_child = n._root; 4349 4350 n._root->prev_sibling_c = node._root->prev_sibling_c; 4351 n._root->next_sibling = node._root; 4352 node._root->prev_sibling_c = n._root; 4353 4354 if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml")); 4355 4356 return n; 4357 } 4358 4359 PUGI__FN xml_node xml_node::insert_child_after(xml_node_type type_, const xml_node& node) 4360 { 4361 if (!impl::allow_insert_child(this->type(), type_)) return xml_node(); 4362 if (!node._root || node._root->parent != _root) return xml_node(); 4363 4364 xml_node n(impl::allocate_node(impl::get_allocator(_root), type_)); 4365 if (!n) return xml_node(); 4366 4367 n._root->parent = _root; 4368 4369 if (node._root->next_sibling) 4370 node._root->next_sibling->prev_sibling_c = n._root; 4371 else 4372 _root->first_child->prev_sibling_c = n._root; 4373 4374 n._root->next_sibling = node._root->next_sibling; 4375 n._root->prev_sibling_c = node._root; 4376 node._root->next_sibling = n._root; 4377 4378 if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml")); 4379 4380 return n; 4381 } 4382 4383 PUGI__FN xml_node xml_node::append_child(const char_t* name_) 4384 { 4385 xml_node result = append_child(node_element); 4386 4387 result.set_name(name_); 4388 4389 return result; 4390 } 4391 4392 PUGI__FN xml_node xml_node::prepend_child(const char_t* name_) 4393 { 4394 xml_node result = prepend_child(node_element); 4395 4396 result.set_name(name_); 4397 4398 return result; 4399 } 4400 4401 PUGI__FN xml_node xml_node::insert_child_after(const char_t* name_, const xml_node& node) 4402 { 4403 xml_node result = insert_child_after(node_element, node); 4404 4405 result.set_name(name_); 4406 4407 return result; 4408 } 4409 4410 PUGI__FN xml_node xml_node::insert_child_before(const char_t* name_, const xml_node& node) 4411 { 4412 xml_node result = insert_child_before(node_element, node); 4413 4414 result.set_name(name_); 4415 4416 return result; 4417 } 4418 4419 PUGI__FN xml_node xml_node::append_copy(const xml_node& proto) 4420 { 4421 xml_node result = append_child(proto.type()); 4422 4423 if (result) impl::recursive_copy_skip(result, proto, result); 4424 4425 return result; 4426 } 4427 4428 PUGI__FN xml_node xml_node::prepend_copy(const xml_node& proto) 4429 { 4430 xml_node result = prepend_child(proto.type()); 4431 4432 if (result) impl::recursive_copy_skip(result, proto, result); 4433 4434 return result; 4435 } 4436 4437 PUGI__FN xml_node xml_node::insert_copy_after(const xml_node& proto, const xml_node& node) 4438 { 4439 xml_node result = insert_child_after(proto.type(), node); 4440 4441 if (result) impl::recursive_copy_skip(result, proto, result); 4442 4443 return result; 4444 } 4445 4446 PUGI__FN xml_node xml_node::insert_copy_before(const xml_node& proto, const xml_node& node) 4447 { 4448 xml_node result = insert_child_before(proto.type(), node); 4449 4450 if (result) impl::recursive_copy_skip(result, proto, result); 4451 4452 return result; 4453 } 4454 4455 PUGI__FN bool xml_node::remove_attribute(const char_t* name_) 4456 { 4457 return remove_attribute(attribute(name_)); 4458 } 4459 4460 PUGI__FN bool xml_node::remove_attribute(const xml_attribute& a) 4461 { 4462 if (!_root || !a._attr) return false; 4463 4464 // check that attribute belongs to *this 4465 xml_attribute_struct* attr = a._attr; 4466 4467 while (attr->prev_attribute_c->next_attribute) attr = attr->prev_attribute_c; 4468 4469 if (attr != _root->first_attribute) return false; 4470 4471 if (a._attr->next_attribute) a._attr->next_attribute->prev_attribute_c = a._attr->prev_attribute_c; 4472 else if (_root->first_attribute) _root->first_attribute->prev_attribute_c = a._attr->prev_attribute_c; 4473 4474 if (a._attr->prev_attribute_c->next_attribute) a._attr->prev_attribute_c->next_attribute = a._attr->next_attribute; 4475 else _root->first_attribute = a._attr->next_attribute; 4476 4477 impl::destroy_attribute(a._attr, impl::get_allocator(_root)); 4478 4479 return true; 4480 } 4481 4482 PUGI__FN bool xml_node::remove_child(const char_t* name_) 4483 { 4484 return remove_child(child(name_)); 4485 } 4486 4487 PUGI__FN bool xml_node::remove_child(const xml_node& n) 4488 { 4489 if (!_root || !n._root || n._root->parent != _root) return false; 4490 4491 if (n._root->next_sibling) n._root->next_sibling->prev_sibling_c = n._root->prev_sibling_c; 4492 else if (_root->first_child) _root->first_child->prev_sibling_c = n._root->prev_sibling_c; 4493 4494 if (n._root->prev_sibling_c->next_sibling) n._root->prev_sibling_c->next_sibling = n._root->next_sibling; 4495 else _root->first_child = n._root->next_sibling; 4496 4497 impl::destroy_node(n._root, impl::get_allocator(_root)); 4498 4499 return true; 5931 // get extra buffer element (we'll store the document fragment buffer there so that we can deallocate it later) 5932 impl::xml_memory_page* page = 0; 5933 impl::xml_extra_buffer* extra = static_cast<impl::xml_extra_buffer*>(doc->allocate_memory(sizeof(impl::xml_extra_buffer), page)); 5934 (void)page; 5935 5936 if (!extra) return impl::make_parse_result(status_out_of_memory); 5937 5938 // add extra buffer to the list 5939 extra->buffer = 0; 5940 extra->next = doc->extra_buffers; 5941 doc->extra_buffers = extra; 5942 5943 // name of the root has to be NULL before parsing - otherwise closing node mismatches will not be detected at the top level 5944 impl::name_null_sentry sentry(_root); 5945 5946 return impl::load_buffer_impl(doc, _root, const_cast<void*>(contents), size, options, encoding, false, false, &extra->buffer); 4500 5947 } 4501 5948 … … 4508 5955 { 4509 5956 for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute) 4510 if ( impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value))5957 if (a->name && impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value ? a->value + 0 : PUGIXML_TEXT(""))) 4511 5958 return xml_node(i); 4512 5959 } … … 4521 5968 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) 4522 5969 for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute) 4523 if ( impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value))5970 if (a->name && impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value ? a->value + 0 : PUGIXML_TEXT(""))) 4524 5971 return xml_node(i); 4525 5972 … … 4530 5977 PUGI__FN string_t xml_node::path(char_t delimiter) const 4531 5978 { 4532 xml_node cursor = *this; // Make a copy. 4533 4534 string_t result = cursor.name(); 4535 4536 while (cursor.parent()) 4537 { 4538 cursor = cursor.parent(); 4539 4540 string_t temp = cursor.name(); 4541 temp += delimiter; 4542 temp += result; 4543 result.swap(temp); 4544 } 5979 if (!_root) return string_t(); 5980 5981 size_t offset = 0; 5982 5983 for (xml_node_struct* i = _root; i; i = i->parent) 5984 { 5985 offset += (i != _root); 5986 offset += i->name ? impl::strlength(i->name) : 0; 5987 } 5988 5989 string_t result; 5990 result.resize(offset); 5991 5992 for (xml_node_struct* j = _root; j; j = j->parent) 5993 { 5994 if (j != _root) 5995 result[--offset] = delimiter; 5996 5997 if (j->name && *j->name) 5998 { 5999 size_t length = impl::strlength(j->name); 6000 6001 offset -= length; 6002 memcpy(&result[offset], j->name, length * sizeof(char_t)); 6003 } 6004 } 6005 6006 assert(offset == 0); 4545 6007 4546 6008 return result; … … 4659 6121 impl::xml_buffered_writer buffered_writer(writer, encoding); 4660 6122 4661 impl::node_output(buffered_writer, *this, indent, flags, depth); 6123 impl::node_output(buffered_writer, _root, indent, flags, depth); 6124 6125 buffered_writer.flush(); 4662 6126 } 4663 6127 … … 4680 6144 PUGI__FN ptrdiff_t xml_node::offset_debug() const 4681 6145 { 4682 xml_node_struct* r = root()._root; 4683 4684 if (!r) return -1; 4685 4686 const char_t* buffer = static_cast<impl::xml_document_struct*>(r)->buffer; 4687 4688 if (!buffer) return -1; 6146 if (!_root) return -1; 6147 6148 impl::xml_document_struct& doc = impl::get_document(_root); 6149 6150 // we can determine the offset reliably only if there is exactly once parse buffer 6151 if (!doc.buffer || doc.extra_buffers) return -1; 4689 6152 4690 6153 switch (type()) … … 4696 6159 case node_declaration: 4697 6160 case node_pi: 4698 return (_root->header & impl::xml_memory_page_name_allocated_mask) ? -1 : _root->name - buffer;6161 return _root->name && (_root->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0 ? _root->name - doc.buffer : -1; 4699 6162 4700 6163 case node_pcdata: … … 4702 6165 case node_comment: 4703 6166 case node_doctype: 4704 return (_root->header & impl::xml_memory_page_value_allocated_mask) ? -1 : _root->value - buffer;6167 return _root->value && (_root->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0 ? _root->value - doc.buffer : -1; 4705 6168 4706 6169 default: … … 4771 6234 xml_node_struct* d = _data(); 4772 6235 4773 return (d && d->value) ? d->value : PUGIXML_TEXT("");6236 return (d && d->value) ? d->value + 0 : PUGIXML_TEXT(""); 4774 6237 } 4775 6238 … … 4778 6241 xml_node_struct* d = _data(); 4779 6242 4780 return (d && d->value) ? d->value : def;6243 return (d && d->value) ? d->value + 0 : def; 4781 6244 } 4782 6245 … … 4785 6248 xml_node_struct* d = _data(); 4786 6249 4787 return impl::get_value_int(d ? d->value : 0, def);6250 return (d && d->value) ? impl::get_value_int(d->value) : def; 4788 6251 } 4789 6252 … … 4792 6255 xml_node_struct* d = _data(); 4793 6256 4794 return impl::get_value_uint(d ? d->value : 0, def);6257 return (d && d->value) ? impl::get_value_uint(d->value) : def; 4795 6258 } 4796 6259 … … 4799 6262 xml_node_struct* d = _data(); 4800 6263 4801 return impl::get_value_double(d ? d->value : 0, def);6264 return (d && d->value) ? impl::get_value_double(d->value) : def; 4802 6265 } 4803 6266 … … 4806 6269 xml_node_struct* d = _data(); 4807 6270 4808 return impl::get_value_float(d ? d->value : 0, def);6271 return (d && d->value) ? impl::get_value_float(d->value) : def; 4809 6272 } 4810 6273 … … 4813 6276 xml_node_struct* d = _data(); 4814 6277 4815 return impl::get_value_bool(d ? d->value : 0, def); 4816 } 6278 return (d && d->value) ? impl::get_value_bool(d->value) : def; 6279 } 6280 6281 #ifdef PUGIXML_HAS_LONG_LONG 6282 PUGI__FN long long xml_text::as_llong(long long def) const 6283 { 6284 xml_node_struct* d = _data(); 6285 6286 return (d && d->value) ? impl::get_value_llong(d->value) : def; 6287 } 6288 6289 PUGI__FN unsigned long long xml_text::as_ullong(unsigned long long def) const 6290 { 6291 xml_node_struct* d = _data(); 6292 6293 return (d && d->value) ? impl::get_value_ullong(d->value) : def; 6294 } 6295 #endif 4817 6296 4818 6297 PUGI__FN bool xml_text::set(const char_t* rhs) … … 4820 6299 xml_node_struct* dn = _data_new(); 4821 6300 4822 return dn ? impl::strcpy_insitu(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs ) : false;6301 return dn ? impl::strcpy_insitu(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs)) : false; 4823 6302 } 4824 6303 … … 4837 6316 } 4838 6317 6318 PUGI__FN bool xml_text::set(float rhs) 6319 { 6320 xml_node_struct* dn = _data_new(); 6321 6322 return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false; 6323 } 6324 4839 6325 PUGI__FN bool xml_text::set(double rhs) 4840 6326 { … … 4850 6336 return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false; 4851 6337 } 6338 6339 #ifdef PUGIXML_HAS_LONG_LONG 6340 PUGI__FN bool xml_text::set(long long rhs) 6341 { 6342 xml_node_struct* dn = _data_new(); 6343 6344 return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false; 6345 } 6346 6347 PUGI__FN bool xml_text::set(unsigned long long rhs) 6348 { 6349 xml_node_struct* dn = _data_new(); 6350 6351 return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false; 6352 } 6353 #endif 4852 6354 4853 6355 PUGI__FN xml_text& xml_text::operator=(const char_t* rhs) … … 4875 6377 } 4876 6378 4877 PUGI__FN xml_text& xml_text::operator=( boolrhs)6379 PUGI__FN xml_text& xml_text::operator=(float rhs) 4878 6380 { 4879 6381 set(rhs); 4880 6382 return *this; 4881 6383 } 6384 6385 PUGI__FN xml_text& xml_text::operator=(bool rhs) 6386 { 6387 set(rhs); 6388 return *this; 6389 } 6390 6391 #ifdef PUGIXML_HAS_LONG_LONG 6392 PUGI__FN xml_text& xml_text::operator=(long long rhs) 6393 { 6394 set(rhs); 6395 return *this; 6396 } 6397 6398 PUGI__FN xml_text& xml_text::operator=(unsigned long long rhs) 6399 { 6400 set(rhs); 6401 return *this; 6402 } 6403 #endif 4882 6404 4883 6405 PUGI__FN xml_node xml_text::data() const … … 5024 6546 } 5025 6547 5026 PUGI__FN xml_named_node_iterator::xml_named_node_iterator(const xml_node& node, const char_t* name): _node(node), _name(name) 6548 PUGI__FN xml_named_node_iterator::xml_named_node_iterator(const xml_node& node, const char_t* name): _wrap(node), _parent(node.parent()), _name(name) 6549 { 6550 } 6551 6552 PUGI__FN xml_named_node_iterator::xml_named_node_iterator(xml_node_struct* ref, xml_node_struct* parent, const char_t* name): _wrap(ref), _parent(parent), _name(name) 5027 6553 { 5028 6554 } … … 5030 6556 PUGI__FN bool xml_named_node_iterator::operator==(const xml_named_node_iterator& rhs) const 5031 6557 { 5032 return _ node == rhs._node;6558 return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root; 5033 6559 } 5034 6560 5035 6561 PUGI__FN bool xml_named_node_iterator::operator!=(const xml_named_node_iterator& rhs) const 5036 6562 { 5037 return _ node != rhs._node;6563 return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root; 5038 6564 } 5039 6565 5040 6566 PUGI__FN xml_node& xml_named_node_iterator::operator*() const 5041 6567 { 5042 assert(_ node._root);5043 return _ node;6568 assert(_wrap._root); 6569 return _wrap; 5044 6570 } 5045 6571 5046 6572 PUGI__FN xml_node* xml_named_node_iterator::operator->() const 5047 6573 { 5048 assert(_ node._root);5049 return const_cast<xml_node*>(&_ node); // BCC32 workaround6574 assert(_wrap._root); 6575 return const_cast<xml_node*>(&_wrap); // BCC32 workaround 5050 6576 } 5051 6577 5052 6578 PUGI__FN const xml_named_node_iterator& xml_named_node_iterator::operator++() 5053 6579 { 5054 assert(_ node._root);5055 _ node = _node.next_sibling(_name);6580 assert(_wrap._root); 6581 _wrap = _wrap.next_sibling(_name); 5056 6582 return *this; 5057 6583 } … … 5061 6587 xml_named_node_iterator temp = *this; 5062 6588 ++*this; 6589 return temp; 6590 } 6591 6592 PUGI__FN const xml_named_node_iterator& xml_named_node_iterator::operator--() 6593 { 6594 if (_wrap._root) 6595 _wrap = _wrap.previous_sibling(_name); 6596 else 6597 { 6598 _wrap = _parent.last_child(); 6599 6600 if (!impl::strequal(_wrap.name(), _name)) 6601 _wrap = _wrap.previous_sibling(_name); 6602 } 6603 6604 return *this; 6605 } 6606 6607 PUGI__FN xml_named_node_iterator xml_named_node_iterator::operator--(int) 6608 { 6609 xml_named_node_iterator temp = *this; 6610 --*this; 5063 6611 return temp; 5064 6612 } … … 5096 6644 case status_end_element_mismatch: return "Start-end tags mismatch"; 5097 6645 6646 case status_append_invalid_root: return "Unable to append nodes: root is not an element or document"; 6647 6648 case status_no_document_element: return "No document element found"; 6649 5098 6650 default: return "Unknown error"; 5099 6651 } … … 5126 6678 PUGI__FN void xml_document::create() 5127 6679 { 6680 assert(!_root); 6681 6682 #ifdef PUGIXML_COMPACT 6683 const size_t page_offset = sizeof(uint32_t); 6684 #else 6685 const size_t page_offset = 0; 6686 #endif 6687 5128 6688 // initialize sentinel page 5129 PUGI__STATIC_ASSERT( offsetof(impl::xml_memory_page, data) + sizeof(impl::xml_document_struct) + impl::xml_memory_page_alignment <= sizeof(_memory));6689 PUGI__STATIC_ASSERT(sizeof(impl::xml_memory_page) + sizeof(impl::xml_document_struct) + impl::xml_memory_page_alignment - sizeof(void*) + page_offset <= sizeof(_memory)); 5130 6690 5131 6691 // align upwards to page boundary … … 5134 6694 // prepare page structure 5135 6695 impl::xml_memory_page* page = impl::xml_memory_page::construct(page_memory); 6696 assert(page); 5136 6697 5137 6698 page->busy_size = impl::xml_memory_page_size; 5138 6699 6700 // setup first page marker 6701 #ifdef PUGIXML_COMPACT 6702 // round-trip through void* to avoid 'cast increases required alignment of target type' warning 6703 page->compact_page_marker = reinterpret_cast<uint32_t*>(static_cast<void*>(reinterpret_cast<char*>(page) + sizeof(impl::xml_memory_page))); 6704 *page->compact_page_marker = sizeof(impl::xml_memory_page); 6705 #endif 6706 5139 6707 // allocate new root 5140 _root = new ( page->data) impl::xml_document_struct(page);6708 _root = new (reinterpret_cast<char*>(page) + sizeof(impl::xml_memory_page) + page_offset) impl::xml_document_struct(page); 5141 6709 _root->prev_sibling_c = _root; 5142 6710 5143 6711 // setup sentinel page 5144 6712 page->allocator = static_cast<impl::xml_document_struct*>(_root); 6713 6714 // verify the document allocation 6715 assert(reinterpret_cast<char*>(_root) + sizeof(impl::xml_document_struct) <= _memory + sizeof(_memory)); 5145 6716 } 5146 6717 5147 6718 PUGI__FN void xml_document::destroy() 5148 6719 { 6720 assert(_root); 6721 5149 6722 // destroy static storage 5150 6723 if (_buffer) … … 5154 6727 } 5155 6728 6729 // destroy extra buffers (note: no need to destroy linked list nodes, they're allocated using document allocator) 6730 for (impl::xml_extra_buffer* extra = static_cast<impl::xml_document_struct*>(_root)->extra_buffers; extra; extra = extra->next) 6731 { 6732 if (extra->buffer) impl::xml_memory::deallocate(extra->buffer); 6733 } 6734 5156 6735 // destroy dynamic storage, leave sentinel page (it's in static memory) 5157 if (_root) 5158 { 5159 impl::xml_memory_page* root_page = reinterpret_cast<impl::xml_memory_page*>(_root->header & impl::xml_memory_page_pointer_mask); 5160 assert(root_page && !root_page->prev && !root_page->memory); 5161 5162 // destroy all pages 5163 for (impl::xml_memory_page* page = root_page->next; page; ) 5164 { 5165 impl::xml_memory_page* next = page->next; 5166 5167 impl::xml_allocator::deallocate_page(page); 5168 5169 page = next; 5170 } 5171 5172 // cleanup root page 5173 root_page->allocator = 0; 5174 root_page->next = 0; 5175 root_page->busy_size = root_page->freed_size = 0; 5176 5177 _root = 0; 5178 } 6736 impl::xml_memory_page* root_page = PUGI__GETPAGE(_root); 6737 assert(root_page && !root_page->prev); 6738 assert(reinterpret_cast<char*>(root_page) >= _memory && reinterpret_cast<char*>(root_page) < _memory + sizeof(_memory)); 6739 6740 for (impl::xml_memory_page* page = root_page->next; page; ) 6741 { 6742 impl::xml_memory_page* next = page->next; 6743 6744 impl::xml_allocator::deallocate_page(page); 6745 6746 page = next; 6747 } 6748 6749 #ifdef PUGIXML_COMPACT 6750 // destroy hash table 6751 static_cast<impl::xml_document_struct*>(_root)->hash.clear(); 6752 #endif 6753 6754 _root = 0; 5179 6755 } 5180 6756 … … 5184 6760 reset(); 5185 6761 5186 return impl::load_stream_impl( *this, stream, options, encoding);6762 return impl::load_stream_impl(static_cast<impl::xml_document_struct*>(_root), stream, options, encoding, &_buffer); 5187 6763 } 5188 6764 … … 5191 6767 reset(); 5192 6768 5193 return impl::load_stream_impl( *this, stream, options, encoding_wchar);6769 return impl::load_stream_impl(static_cast<impl::xml_document_struct*>(_root), stream, options, encoding_wchar, &_buffer); 5194 6770 } 5195 6771 #endif 5196 6772 5197 PUGI__FN xml_parse_result xml_document::load (const char_t* contents, unsigned int options)6773 PUGI__FN xml_parse_result xml_document::load_string(const char_t* contents, unsigned int options) 5198 6774 { 5199 6775 // Force native encoding (skip autodetection) … … 5207 6783 } 5208 6784 6785 PUGI__FN xml_parse_result xml_document::load(const char_t* contents, unsigned int options) 6786 { 6787 return load_string(contents, options); 6788 } 6789 5209 6790 PUGI__FN xml_parse_result xml_document::load_file(const char* path_, unsigned int options, xml_encoding encoding) 5210 6791 { 5211 6792 reset(); 5212 6793 5213 FILE* file = fopen(path_, "rb"); 5214 5215 return impl::load_file_impl(*this, file, options, encoding); 6794 using impl::auto_deleter; // MSVC7 workaround 6795 auto_deleter<FILE, int(*)(FILE*)> file(fopen(path_, "rb"), fclose); 6796 6797 return impl::load_file_impl(static_cast<impl::xml_document_struct*>(_root), file.data, options, encoding, &_buffer); 5216 6798 } 5217 6799 … … 5220 6802 reset(); 5221 6803 5222 FILE* file = impl::open_file_wide(path_, L"rb"); 5223 5224 return impl::load_file_impl(*this, file, options, encoding); 5225 } 5226 5227 PUGI__FN xml_parse_result xml_document::load_buffer_impl(void* contents, size_t size, unsigned int options, xml_encoding encoding, bool is_mutable, bool own) 6804 using impl::auto_deleter; // MSVC7 workaround 6805 auto_deleter<FILE, int(*)(FILE*)> file(impl::open_file_wide(path_, L"rb"), fclose); 6806 6807 return impl::load_file_impl(static_cast<impl::xml_document_struct*>(_root), file.data, options, encoding, &_buffer); 6808 } 6809 6810 PUGI__FN xml_parse_result xml_document::load_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding) 5228 6811 { 5229 6812 reset(); 5230 6813 5231 // check input buffer 5232 assert(contents || size == 0); 5233 5234 // get actual encoding 5235 xml_encoding buffer_encoding = impl::get_buffer_encoding(encoding, contents, size); 5236 5237 // get private buffer 5238 char_t* buffer = 0; 5239 size_t length = 0; 5240 5241 if (!impl::convert_buffer(buffer, length, buffer_encoding, contents, size, is_mutable)) return impl::make_parse_result(status_out_of_memory); 5242 5243 // delete original buffer if we performed a conversion 5244 if (own && buffer != contents && contents) impl::xml_memory::deallocate(contents); 5245 5246 // parse 5247 xml_parse_result res = impl::xml_parser::parse(buffer, length, _root, options); 5248 5249 // remember encoding 5250 res.encoding = buffer_encoding; 5251 5252 // grab onto buffer if it's our buffer, user is responsible for deallocating contens himself 5253 if (own || buffer != contents) _buffer = buffer; 5254 5255 return res; 5256 } 5257 5258 PUGI__FN xml_parse_result xml_document::load_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding) 5259 { 5260 return load_buffer_impl(const_cast<void*>(contents), size, options, encoding, false, false); 6814 return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, const_cast<void*>(contents), size, options, encoding, false, false, &_buffer); 5261 6815 } 5262 6816 5263 6817 PUGI__FN xml_parse_result xml_document::load_buffer_inplace(void* contents, size_t size, unsigned int options, xml_encoding encoding) 5264 6818 { 5265 return load_buffer_impl(contents, size, options, encoding, true, false); 5266 } 5267 6819 reset(); 6820 6821 return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, contents, size, options, encoding, true, false, &_buffer); 6822 } 6823 5268 6824 PUGI__FN xml_parse_result xml_document::load_buffer_inplace_own(void* contents, size_t size, unsigned int options, xml_encoding encoding) 5269 6825 { 5270 return load_buffer_impl(contents, size, options, encoding, true, true); 6826 reset(); 6827 6828 return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, contents, size, options, encoding, true, true, &_buffer); 5271 6829 } 5272 6830 … … 5286 6844 } 5287 6845 5288 if (!(flags & format_no_declaration) && !impl::has_declaration( *this))5289 { 5290 buffered_writer.write (PUGIXML_TEXT("<?xml version=\"1.0\""));5291 if (encoding == encoding_latin1) buffered_writer.write (PUGIXML_TEXT(" encoding=\"ISO-8859-1\""));6846 if (!(flags & format_no_declaration) && !impl::has_declaration(_root)) 6847 { 6848 buffered_writer.write_string(PUGIXML_TEXT("<?xml version=\"1.0\"")); 6849 if (encoding == encoding_latin1) buffered_writer.write_string(PUGIXML_TEXT(" encoding=\"ISO-8859-1\"")); 5292 6850 buffered_writer.write('?', '>'); 5293 6851 if (!(flags & format_raw)) buffered_writer.write('\n'); 5294 6852 } 5295 6853 5296 impl::node_output(buffered_writer, *this, indent, flags, 0); 6854 impl::node_output(buffered_writer, _root, indent, flags, 0); 6855 6856 buffered_writer.flush(); 5297 6857 } 5298 6858 … … 5315 6875 PUGI__FN bool xml_document::save_file(const char* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const 5316 6876 { 5317 FILE* file = fopen(path_, (flags & format_save_file_text) ? "w" : "wb"); 5318 return impl::save_file_impl(*this, file, indent, flags, encoding); 6877 using impl::auto_deleter; // MSVC7 workaround 6878 auto_deleter<FILE, int(*)(FILE*)> file(fopen(path_, (flags & format_save_file_text) ? "w" : "wb"), fclose); 6879 6880 return impl::save_file_impl(*this, file.data, indent, flags, encoding); 5319 6881 } 5320 6882 5321 6883 PUGI__FN bool xml_document::save_file(const wchar_t* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const 5322 6884 { 5323 FILE* file = impl::open_file_wide(path_, (flags & format_save_file_text) ? L"w" : L"wb"); 5324 return impl::save_file_impl(*this, file, indent, flags, encoding); 6885 using impl::auto_deleter; // MSVC7 workaround 6886 auto_deleter<FILE, int(*)(FILE*)> file(impl::open_file_wide(path_, (flags & format_save_file_text) ? L"w" : L"wb"), fclose); 6887 6888 return impl::save_file_impl(*this, file.data, indent, flags, encoding); 5325 6889 } 5326 6890 5327 6891 PUGI__FN xml_node xml_document::document_element() const 5328 6892 { 6893 assert(_root); 6894 5329 6895 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) 5330 if ( (i->header & impl::xml_memory_page_type_mask) + 1== node_element)6896 if (PUGI__NODETYPE(i) == node_element) 5331 6897 return xml_node(i); 5332 6898 … … 5339 6905 assert(str); 5340 6906 5341 return impl::as_utf8_impl(str, wcslen(str));6907 return impl::as_utf8_impl(str, impl::strlength_wide(str)); 5342 6908 } 5343 6909 … … 5391 6957 } 5392 6958 5393 PUGI__FN std:: forward_iterator_tag _Iter_cat(const pugi::xml_named_node_iterator&)5394 { 5395 return std:: forward_iterator_tag();6959 PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_named_node_iterator&) 6960 { 6961 return std::bidirectional_iterator_tag(); 5396 6962 } 5397 6963 } … … 5412 6978 } 5413 6979 5414 PUGI__FN std:: forward_iterator_tag __iterator_category(const pugi::xml_named_node_iterator&)5415 { 5416 return std:: forward_iterator_tag();6980 PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_named_node_iterator&) 6981 { 6982 return std::bidirectional_iterator_tag(); 5417 6983 } 5418 6984 } … … 5420 6986 5421 6987 #ifndef PUGIXML_NO_XPATH 5422 5423 6988 // STL replacements 5424 6989 PUGI__NS_BEGIN … … 5475 7040 template <typename I> void reverse(I begin, I end) 5476 7041 { 5477 while ( begin + 1 < end) swap(*begin++, *--end);7042 while (end - begin > 1) swap(*begin++, *--end); 5478 7043 } 5479 7044 … … 5481 7046 { 5482 7047 // fast skip head 5483 while ( begin + 1 < end&& *begin != *(begin + 1)) begin++;7048 while (end - begin > 1 && *begin != *(begin + 1)) begin++; 5484 7049 5485 7050 if (begin == end) return begin; … … 5650 7215 // Allocator used for AST and evaluation stacks 5651 7216 PUGI__NS_BEGIN 7217 static const size_t xpath_memory_page_size = 7218 #ifdef PUGIXML_MEMORY_XPATH_PAGE_SIZE 7219 PUGIXML_MEMORY_XPATH_PAGE_SIZE 7220 #else 7221 4096 7222 #endif 7223 ; 7224 7225 static const uintptr_t xpath_memory_block_alignment = sizeof(double) > sizeof(void*) ? sizeof(double) : sizeof(void*); 7226 5652 7227 struct xpath_memory_block 5653 7228 { 5654 7229 xpath_memory_block* next; 5655 5656 char data[ 5657 #ifdef PUGIXML_MEMORY_XPATH_PAGE_SIZE 5658 PUGIXML_MEMORY_XPATH_PAGE_SIZE 5659 #else 5660 4096 5661 #endif 5662 ]; 7230 size_t capacity; 7231 7232 union 7233 { 7234 char data[xpath_memory_page_size]; 7235 double alignment; 7236 }; 5663 7237 }; 5664 7238 … … 5682 7256 void* allocate_nothrow(size_t size) 5683 7257 { 5684 const size_t block_capacity = sizeof(_root->data); 5685 5686 // align size so that we're able to store pointers in subsequent blocks 5687 size = (size + sizeof(void*) - 1) & ~(sizeof(void*) - 1); 5688 5689 if (_root_size + size <= block_capacity) 5690 { 5691 void* buf = _root->data + _root_size; 7258 // round size up to block alignment boundary 7259 size = (size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1); 7260 7261 if (_root_size + size <= _root->capacity) 7262 { 7263 void* buf = &_root->data[0] + _root_size; 5692 7264 _root_size += size; 5693 7265 return buf; … … 5695 7267 else 5696 7268 { 5697 size_t block_data_size = (size > block_capacity) ? size : block_capacity; 5698 size_t block_size = block_data_size + offsetof(xpath_memory_block, data); 7269 // make sure we have at least 1/4th of the page free after allocation to satisfy subsequent allocation requests 7270 size_t block_capacity_base = sizeof(_root->data); 7271 size_t block_capacity_req = size + block_capacity_base / 4; 7272 size_t block_capacity = (block_capacity_base > block_capacity_req) ? block_capacity_base : block_capacity_req; 7273 7274 size_t block_size = block_capacity + offsetof(xpath_memory_block, data); 5699 7275 5700 7276 xpath_memory_block* block = static_cast<xpath_memory_block*>(xml_memory::allocate(block_size)); … … 5702 7278 5703 7279 block->next = _root; 7280 block->capacity = block_capacity; 5704 7281 5705 7282 _root = block; … … 5729 7306 void* reallocate(void* ptr, size_t old_size, size_t new_size) 5730 7307 { 5731 // align size so that we're able to store pointers in subsequent blocks5732 old_size = (old_size + sizeof(void*) - 1) & ~(sizeof(void*)- 1);5733 new_size = (new_size + sizeof(void*) - 1) & ~(sizeof(void*)- 1);7308 // round size up to block alignment boundary 7309 old_size = (old_size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1); 7310 new_size = (new_size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1); 5734 7311 5735 7312 // we can only reallocate the last object 5736 assert(ptr == 0 || static_cast<char*>(ptr) + old_size == _root->data+ _root_size);7313 assert(ptr == 0 || static_cast<char*>(ptr) + old_size == &_root->data[0] + _root_size); 5737 7314 5738 7315 // adjust root size so that we have not allocated the object at all … … 5749 7326 { 5750 7327 // copy old data 5751 assert(new_size > old_size);7328 assert(new_size >= old_size); 5752 7329 memcpy(result, ptr, old_size); 5753 7330 … … 5842 7419 { 5843 7420 blocks[0].next = blocks[1].next = 0; 7421 blocks[0].capacity = blocks[1].capacity = sizeof(blocks[0].data); 5844 7422 5845 7423 stack.result = &result; … … 5865 7443 const char_t* _buffer; 5866 7444 bool _uses_heap; 7445 size_t _length_heap; 5867 7446 5868 7447 static char_t* duplicate_string(const char_t* string, size_t length, xpath_allocator* alloc) … … 5877 7456 } 5878 7457 5879 static char_t* duplicate_string(const char_t* string, xpath_allocator* alloc) 5880 { 5881 return duplicate_string(string, strlength(string), alloc); 7458 xpath_string(const char_t* buffer, bool uses_heap_, size_t length_heap): _buffer(buffer), _uses_heap(uses_heap_), _length_heap(length_heap) 7459 { 5882 7460 } 5883 7461 5884 7462 public: 5885 xpath_string(): _buffer(PUGIXML_TEXT("")), _uses_heap(false) 5886 { 5887 } 5888 5889 explicit xpath_string(const char_t* str, xpath_allocator* alloc) 5890 { 5891 bool empty_ = (*str == 0); 5892 5893 _buffer = empty_ ? PUGIXML_TEXT("") : duplicate_string(str, alloc); 5894 _uses_heap = !empty_; 5895 } 5896 5897 explicit xpath_string(const char_t* str, bool use_heap): _buffer(str), _uses_heap(use_heap) 5898 { 5899 } 5900 5901 xpath_string(const char_t* begin, const char_t* end, xpath_allocator* alloc) 7463 static xpath_string from_const(const char_t* str) 7464 { 7465 return xpath_string(str, false, 0); 7466 } 7467 7468 static xpath_string from_heap_preallocated(const char_t* begin, const char_t* end) 7469 { 7470 assert(begin <= end && *end == 0); 7471 7472 return xpath_string(begin, true, static_cast<size_t>(end - begin)); 7473 } 7474 7475 static xpath_string from_heap(const char_t* begin, const char_t* end, xpath_allocator* alloc) 5902 7476 { 5903 7477 assert(begin <= end); 5904 7478 5905 bool empty_ = (begin == end); 5906 5907 _buffer = empty_ ? PUGIXML_TEXT("") : duplicate_string(begin, static_cast<size_t>(end - begin), alloc); 5908 _uses_heap = !empty_; 7479 size_t length = static_cast<size_t>(end - begin); 7480 7481 return length == 0 ? xpath_string() : xpath_string(duplicate_string(begin, length, alloc), true, length); 7482 } 7483 7484 xpath_string(): _buffer(PUGIXML_TEXT("")), _uses_heap(false), _length_heap(0) 7485 { 5909 7486 } 5910 7487 … … 5922 7499 { 5923 7500 // need to make heap copy 5924 size_t target_length = strlength(_buffer);5925 size_t source_length = strlength(o._buffer);7501 size_t target_length = length(); 7502 size_t source_length = o.length(); 5926 7503 size_t result_length = target_length + source_length; 5927 7504 … … 5940 7517 _buffer = result; 5941 7518 _uses_heap = true; 7519 _length_heap = result_length; 5942 7520 } 5943 7521 } … … 5950 7528 size_t length() const 5951 7529 { 5952 return strlength(_buffer);7530 return _uses_heap ? _length_heap : strlength(_buffer); 5953 7531 } 5954 7532 … … 5958 7536 if (!_uses_heap) 5959 7537 { 5960 _buffer = duplicate_string(_buffer, alloc); 7538 size_t length_ = strlength(_buffer); 7539 7540 _buffer = duplicate_string(_buffer, length_, alloc); 5961 7541 _uses_heap = true; 7542 _length_heap = length_; 5962 7543 } 5963 7544 … … 5985 7566 } 5986 7567 }; 5987 5988 PUGI__FN xpath_string xpath_string_const(const char_t* str)5989 {5990 return xpath_string(str, false);5991 }5992 7568 PUGI__NS_END 5993 7569 … … 6032 7608 { 6033 7609 if (na.attribute()) 6034 return xpath_string _const(na.attribute().value());7610 return xpath_string::from_const(na.attribute().value()); 6035 7611 else 6036 7612 { 6037 const xml_node&n = na.node();7613 xml_node n = na.node(); 6038 7614 6039 7615 switch (n.type()) … … 6043 7619 case node_comment: 6044 7620 case node_pi: 6045 return xpath_string _const(n.value());7621 return xpath_string::from_const(n.value()); 6046 7622 6047 7623 case node_document: … … 6055 7631 { 6056 7632 if (cur.type() == node_pcdata || cur.type() == node_cdata) 6057 result.append(xpath_string _const(cur.value()), alloc);7633 result.append(xpath_string::from_const(cur.value()), alloc); 6058 7634 6059 7635 if (cur.first_child()) … … 6079 7655 } 6080 7656 6081 PUGI__FN unsigned int node_height(xml_node n) 6082 { 6083 unsigned int result = 0; 6084 6085 while (n) 6086 { 6087 ++result; 6088 n = n.parent(); 6089 } 6090 6091 return result; 7657 PUGI__FN bool node_is_before_sibling(xml_node_struct* ln, xml_node_struct* rn) 7658 { 7659 assert(ln->parent == rn->parent); 7660 7661 // there is no common ancestor (the shared parent is null), nodes are from different documents 7662 if (!ln->parent) return ln < rn; 7663 7664 // determine sibling order 7665 xml_node_struct* ls = ln; 7666 xml_node_struct* rs = rn; 7667 7668 while (ls && rs) 7669 { 7670 if (ls == rn) return true; 7671 if (rs == ln) return false; 7672 7673 ls = ls->next_sibling; 7674 rs = rs->next_sibling; 7675 } 7676 7677 // if rn sibling chain ended ln must be before rn 7678 return !rs; 6092 7679 } 6093 7680 6094 PUGI__FN bool node_is_before(xml_node ln, unsigned int lh, xml_node rn, unsigned int rh) 6095 { 6096 // normalize heights 6097 for (unsigned int i = rh; i < lh; i++) ln = ln.parent(); 6098 for (unsigned int j = lh; j < rh; j++) rn = rn.parent(); 6099 7681 PUGI__FN bool node_is_before(xml_node_struct* ln, xml_node_struct* rn) 7682 { 7683 // find common ancestor at the same depth, if any 7684 xml_node_struct* lp = ln; 7685 xml_node_struct* rp = rn; 7686 7687 while (lp && rp && lp->parent != rp->parent) 7688 { 7689 lp = lp->parent; 7690 rp = rp->parent; 7691 } 7692 7693 // parents are the same! 7694 if (lp && rp) return node_is_before_sibling(lp, rp); 7695 7696 // nodes are at different depths, need to normalize heights 7697 bool left_higher = !lp; 7698 7699 while (lp) 7700 { 7701 lp = lp->parent; 7702 ln = ln->parent; 7703 } 7704 7705 while (rp) 7706 { 7707 rp = rp->parent; 7708 rn = rn->parent; 7709 } 7710 6100 7711 // one node is the ancestor of the other 6101 if (ln == rn) return lh < rh; 6102 6103 // find common ancestor 6104 while (ln.parent() != rn.parent()) 6105 { 6106 ln = ln.parent(); 6107 rn = rn.parent(); 6108 } 6109 6110 // there is no common ancestor (the shared parent is null), nodes are from different documents 6111 if (!ln.parent()) return ln < rn; 6112 6113 // determine sibling order 6114 for (; ln; ln = ln.next_sibling()) 6115 if (ln == rn) 6116 return true; 6117 6118 return false; 6119 } 6120 6121 PUGI__FN bool node_is_ancestor(xml_node parent, xml_node node) 6122 { 6123 while (node && node != parent) node = node.parent(); 7712 if (ln == rn) return left_higher; 7713 7714 // find common ancestor... again 7715 while (ln->parent != rn->parent) 7716 { 7717 ln = ln->parent; 7718 rn = rn->parent; 7719 } 7720 7721 return node_is_before_sibling(ln, rn); 7722 } 7723 7724 PUGI__FN bool node_is_ancestor(xml_node_struct* parent, xml_node_struct* node) 7725 { 7726 while (node && node != parent) node = node->parent; 6124 7727 6125 7728 return parent && node == parent; 6126 7729 } 6127 7730 6128 PUGI__FN const void* document_ order(const xpath_node& xnode)7731 PUGI__FN const void* document_buffer_order(const xpath_node& xnode) 6129 7732 { 6130 7733 xml_node_struct* node = xnode.node().internal_object(); … … 6132 7735 if (node) 6133 7736 { 6134 if (node->name && (node->header & xml_memory_page_name_allocated_mask) == 0) return node->name; 6135 if (node->value && (node->header & xml_memory_page_value_allocated_mask) == 0) return node->value; 7737 if ((get_document(node).header & xml_memory_page_contents_shared_mask) == 0) 7738 { 7739 if (node->name && (node->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0) return node->name; 7740 if (node->value && (node->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0) return node->value; 7741 } 7742 6136 7743 return 0; 6137 7744 } … … 6141 7748 if (attr) 6142 7749 { 6143 if ((attr->header & xml_memory_page_name_allocated_mask) == 0) return attr->name; 6144 if ((attr->header & xml_memory_page_value_allocated_mask) == 0) return attr->value; 7750 if ((get_document(attr).header & xml_memory_page_contents_shared_mask) == 0) 7751 { 7752 if ((attr->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0) return attr->name; 7753 if ((attr->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0) return attr->value; 7754 } 7755 6145 7756 return 0; 6146 7757 } … … 6154 7765 { 6155 7766 // optimized document order based check 6156 const void* lo = document_ order(lhs);6157 const void* ro = document_ order(rhs);7767 const void* lo = document_buffer_order(lhs); 7768 const void* ro = document_buffer_order(rhs); 6158 7769 6159 7770 if (lo && ro) return lo < ro; … … 6196 7807 6197 7808 if (ln == rn) return false; 7809 7810 if (!ln || !rn) return ln < rn; 6198 7811 6199 unsigned int lh = node_height(ln); 6200 unsigned int rh = node_height(rn); 6201 6202 return node_is_before(ln, lh, rn, rh); 7812 return node_is_before(ln.internal_object(), rn.internal_object()); 6203 7813 } 6204 7814 }; … … 6334 7944 // try special number conversion 6335 7945 const char_t* special = convert_number_to_string_special(value); 6336 if (special) return xpath_string _const(special);7946 if (special) return xpath_string::from_const(special); 6337 7947 6338 7948 // get mantissa + exponent form 6339 char mantissa_buffer[ 64];7949 char mantissa_buffer[32]; 6340 7950 6341 7951 char* mantissa; … … 6343 7953 convert_number_to_mantissa_exponent(value, mantissa_buffer, sizeof(mantissa_buffer), &mantissa, &exponent); 6344 7954 7955 // allocate a buffer of suitable length for the number 7956 size_t result_size = strlen(mantissa_buffer) + (exponent > 0 ? exponent : -exponent) + 4; 7957 char_t* result = static_cast<char_t*>(alloc->allocate(sizeof(char_t) * result_size)); 7958 assert(result); 7959 6345 7960 // make the number! 6346 char_t result[512];6347 7961 char_t* s = result; 6348 7962 … … 6359 7973 while (exponent > 0) 6360 7974 { 6361 assert(*mantissa == 0 || static_cast<unsigned int>( *mantissa- '0') <= 9);7975 assert(*mantissa == 0 || static_cast<unsigned int>(static_cast<unsigned int>(*mantissa) - '0') <= 9); 6362 7976 *s++ = *mantissa ? *mantissa++ : '0'; 6363 7977 exponent--; … … 6387 8001 6388 8002 // zero-terminate 6389 assert(s < result + sizeof(result) / sizeof(result[0]));8003 assert(s < result + result_size); 6390 8004 *s = 0; 6391 8005 6392 return xpath_string (result, alloc);8006 return xpath_string::from_heap_preallocated(result, s); 6393 8007 } 6394 8008 … … 6432 8046 return wcstod(string, 0); 6433 8047 #else 6434 return atof(string);8048 return strtod(string, 0); 6435 8049 #endif 6436 8050 } 6437 8051 6438 PUGI__FN bool convert_string_to_number(const char_t* begin, const char_t* end, double* out_result) 6439 { 6440 char_t buffer[32]; 6441 8052 PUGI__FN bool convert_string_to_number_scratch(char_t (&buffer)[32], const char_t* begin, const char_t* end, double* out_result) 8053 { 6442 8054 size_t length = static_cast<size_t>(end - begin); 6443 8055 char_t* scratch = buffer; … … 6500 8112 } 6501 8113 6502 bool operator()( const xml_attribute&a) const8114 bool operator()(xml_attribute a) const 6503 8115 { 6504 8116 const char_t* name = a.name(); … … 6510 8122 }; 6511 8123 6512 PUGI__FN const char_t* namespace_uri( const xml_node&node)8124 PUGI__FN const char_t* namespace_uri(xml_node node) 6513 8125 { 6514 8126 namespace_uri_predicate pred = node.name(); … … 6528 8140 } 6529 8141 6530 PUGI__FN const char_t* namespace_uri( const xml_attribute& attr, const xml_node&parent)8142 PUGI__FN const char_t* namespace_uri(xml_attribute attr, xml_node parent) 6531 8143 { 6532 8144 namespace_uri_predicate pred = attr.name(); … … 6554 8166 } 6555 8167 6556 PUGI__FN voidnormalize_space(char_t* buffer)8168 PUGI__FN char_t* normalize_space(char_t* buffer) 6557 8169 { 6558 8170 char_t* write = buffer; … … 6578 8190 // zero-terminate 6579 8191 *write = 0; 6580 } 6581 6582 PUGI__FN void translate(char_t* buffer, const char_t* from, const char_t* to)6583 { 6584 size_t to_length = strlength(to);6585 8192 8193 return write; 8194 } 8195 8196 PUGI__FN char_t* translate(char_t* buffer, const char_t* from, const char_t* to, size_t to_length) 8197 { 6586 8198 char_t* write = buffer; 6587 8199 … … 6600 8212 // zero-terminate 6601 8213 *write = 0; 8214 8215 return write; 8216 } 8217 8218 PUGI__FN unsigned char* translate_table_generate(xpath_allocator* alloc, const char_t* from, const char_t* to) 8219 { 8220 unsigned char table[128] = {0}; 8221 8222 while (*from) 8223 { 8224 unsigned int fc = static_cast<unsigned int>(*from); 8225 unsigned int tc = static_cast<unsigned int>(*to); 8226 8227 if (fc >= 128 || tc >= 128) 8228 return 0; 8229 8230 // code=128 means "skip character" 8231 if (!table[fc]) 8232 table[fc] = static_cast<unsigned char>(tc ? tc : 128); 8233 8234 from++; 8235 if (tc) to++; 8236 } 8237 8238 for (int i = 0; i < 128; ++i) 8239 if (!table[i]) 8240 table[i] = static_cast<unsigned char>(i); 8241 8242 void* result = alloc->allocate_nothrow(sizeof(table)); 8243 8244 if (result) 8245 { 8246 memcpy(result, table, sizeof(table)); 8247 } 8248 8249 return static_cast<unsigned char*>(result); 8250 } 8251 8252 PUGI__FN char_t* translate_table(char_t* buffer, const unsigned char* table) 8253 { 8254 char_t* write = buffer; 8255 8256 while (*buffer) 8257 { 8258 char_t ch = *buffer++; 8259 unsigned int index = static_cast<unsigned int>(ch); 8260 8261 if (index < 128) 8262 { 8263 unsigned char code = table[index]; 8264 8265 // code=128 means "skip character" (table size is 128 so 128 can be a special value) 8266 // this code skips these characters without extra branches 8267 *write = static_cast<char_t>(code); 8268 write += 1 - (code >> 7); 8269 } 8270 else 8271 { 8272 *write++ = ch; 8273 } 8274 } 8275 8276 // zero-terminate 8277 *write = 0; 8278 8279 return write; 8280 } 8281 8282 inline bool is_xpath_attribute(const char_t* name) 8283 { 8284 return !(starts_with(name, PUGIXML_TEXT("xmlns")) && (name[5] == 0 || name[5] == ':')); 6602 8285 } 6603 8286 6604 8287 struct xpath_variable_boolean: xpath_variable 6605 8288 { 6606 xpath_variable_boolean(): value(false)8289 xpath_variable_boolean(): xpath_variable(xpath_type_boolean), value(false) 6607 8290 { 6608 8291 } … … 6614 8297 struct xpath_variable_number: xpath_variable 6615 8298 { 6616 xpath_variable_number(): value(0)8299 xpath_variable_number(): xpath_variable(xpath_type_number), value(0) 6617 8300 { 6618 8301 } … … 6624 8307 struct xpath_variable_string: xpath_variable 6625 8308 { 6626 xpath_variable_string(): value(0)8309 xpath_variable_string(): xpath_variable(xpath_type_string), value(0) 6627 8310 { 6628 8311 } … … 6639 8322 struct xpath_variable_node_set: xpath_variable 6640 8323 { 8324 xpath_variable_node_set(): xpath_variable(xpath_type_node_set) 8325 { 8326 } 8327 6641 8328 xpath_node_set value; 6642 8329 char_t name[1]; … … 6732 8419 } 6733 8420 6734 PUGI__FN xpath_variable* get_variable(xpath_variable_set* set, const char_t* begin, const char_t* end) 6735 { 6736 char_t buffer[32]; 6737 8421 PUGI__FN bool copy_xpath_variable(xpath_variable* lhs, const xpath_variable* rhs) 8422 { 8423 switch (rhs->type()) 8424 { 8425 case xpath_type_node_set: 8426 return lhs->set(static_cast<const xpath_variable_node_set*>(rhs)->value); 8427 8428 case xpath_type_number: 8429 return lhs->set(static_cast<const xpath_variable_number*>(rhs)->value); 8430 8431 case xpath_type_string: 8432 return lhs->set(static_cast<const xpath_variable_string*>(rhs)->value); 8433 8434 case xpath_type_boolean: 8435 return lhs->set(static_cast<const xpath_variable_boolean*>(rhs)->value); 8436 8437 default: 8438 assert(!"Invalid variable type"); 8439 return false; 8440 } 8441 } 8442 8443 PUGI__FN bool get_variable_scratch(char_t (&buffer)[32], xpath_variable_set* set, const char_t* begin, const char_t* end, xpath_variable** out_result) 8444 { 6738 8445 size_t length = static_cast<size_t>(end - begin); 6739 8446 char_t* scratch = buffer; … … 6743 8450 // need to make dummy on-heap copy 6744 8451 scratch = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t))); 6745 if (!scratch) return 0;8452 if (!scratch) return false; 6746 8453 } 6747 8454 … … 6750 8457 scratch[length] = 0; 6751 8458 6752 xpath_variable*result = set->get(scratch);8459 *out_result = set->get(scratch); 6753 8460 6754 8461 // free dummy buffer 6755 8462 if (scratch != buffer) xml_memory::deallocate(scratch); 6756 8463 6757 return result;8464 return true; 6758 8465 } 6759 8466 PUGI__NS_END … … 6761 8468 // Internal node set class 6762 8469 PUGI__NS_BEGIN 8470 PUGI__FN xpath_node_set::type_t xpath_get_order(const xpath_node* begin, const xpath_node* end) 8471 { 8472 if (end - begin < 2) 8473 return xpath_node_set::type_sorted; 8474 8475 document_order_comparator cmp; 8476 8477 bool first = cmp(begin[0], begin[1]); 8478 8479 for (const xpath_node* it = begin + 1; it + 1 < end; ++it) 8480 if (cmp(it[0], it[1]) != first) 8481 return xpath_node_set::type_unsorted; 8482 8483 return first ? xpath_node_set::type_sorted : xpath_node_set::type_sorted_reverse; 8484 } 8485 6763 8486 PUGI__FN xpath_node_set::type_t xpath_sort(xpath_node* begin, xpath_node* end, xpath_node_set::type_t type, bool rev) 6764 8487 { … … 6767 8490 if (type == xpath_node_set::type_unsorted) 6768 8491 { 6769 sort(begin, end, document_order_comparator()); 6770 6771 type = xpath_node_set::type_sorted; 8492 xpath_node_set::type_t sorted = xpath_get_order(begin, end); 8493 8494 if (sorted == xpath_node_set::type_unsorted) 8495 { 8496 sort(begin, end, document_order_comparator()); 8497 8498 type = xpath_node_set::type_sorted; 8499 } 8500 else 8501 type = sorted; 6772 8502 } 6773 8503 … … 6836 8566 } 6837 8567 8568 void push_back_grow(const xpath_node& node, xpath_allocator* alloc); 8569 6838 8570 void push_back(const xpath_node& node, xpath_allocator* alloc) 6839 8571 { 6840 if (_end == _eos) 6841 { 6842 size_t capacity = static_cast<size_t>(_eos - _begin); 6843 6844 // get new capacity (1.5x rule) 6845 size_t new_capacity = capacity + capacity / 2 + 1; 6846 6847 // reallocate the old array or allocate a new one 6848 xpath_node* data = static_cast<xpath_node*>(alloc->reallocate(_begin, capacity * sizeof(xpath_node), new_capacity * sizeof(xpath_node))); 6849 assert(data); 6850 6851 // finalize 6852 _begin = data; 6853 _end = data + capacity; 6854 _eos = data + new_capacity; 6855 } 6856 6857 *_end++ = node; 8572 if (_end != _eos) 8573 *_end++ = node; 8574 else 8575 push_back_grow(node, alloc); 6858 8576 } 6859 8577 6860 8578 void append(const xpath_node* begin_, const xpath_node* end_, xpath_allocator* alloc) 6861 8579 { 8580 if (begin_ == end_) return; 8581 6862 8582 size_t size_ = static_cast<size_t>(_end - _begin); 6863 8583 size_t capacity = static_cast<size_t>(_eos - _begin); … … 6910 8630 } 6911 8631 }; 8632 8633 PUGI__FN_NO_INLINE void xpath_node_set_raw::push_back_grow(const xpath_node& node, xpath_allocator* alloc) 8634 { 8635 size_t capacity = static_cast<size_t>(_eos - _begin); 8636 8637 // get new capacity (1.5x rule) 8638 size_t new_capacity = capacity + capacity / 2 + 1; 8639 8640 // reallocate the old array or allocate a new one 8641 xpath_node* data = static_cast<xpath_node*>(alloc->reallocate(_begin, capacity * sizeof(xpath_node), new_capacity * sizeof(xpath_node))); 8642 assert(data); 8643 8644 // finalize 8645 _begin = data; 8646 _end = data + capacity; 8647 _eos = data + new_capacity; 8648 8649 // push 8650 *_end++ = node; 8651 } 6912 8652 PUGI__NS_END 6913 8653 … … 7279 9019 enum ast_type_t 7280 9020 { 9021 ast_unknown, 7281 9022 ast_op_or, // left or right 7282 9023 ast_op_and, // left and right … … 7296 9037 ast_predicate, // apply predicate to set; next points to next predicate 7297 9038 ast_filter, // select * from left where right 7298 ast_filter_posinv, // select * from left where right; proximity position invariant7299 9039 ast_string_constant, // string constant 7300 9040 ast_number_constant, // number constant … … 7336 9076 ast_func_round, // round(left) 7337 9077 ast_step, // process set left with step 7338 ast_step_root // select root node 9078 ast_step_root, // select root node 9079 9080 ast_opt_translate_table, // translate(left, right, third) where right/third are constants 9081 ast_opt_compare_attribute // @name = 'string' 7339 9082 }; 7340 9083 … … 7369 9112 }; 7370 9113 9114 enum predicate_t 9115 { 9116 predicate_default, 9117 predicate_posinv, 9118 predicate_constant, 9119 predicate_constant_one 9120 }; 9121 9122 enum nodeset_eval_t 9123 { 9124 nodeset_eval_all, 9125 nodeset_eval_any, 9126 nodeset_eval_first 9127 }; 9128 7371 9129 template <axis_t N> struct axis_to_type 7372 9130 { … … 7383 9141 char _rettype; 7384 9142 7385 // for ast_step / ast_predicate9143 // for ast_step 7386 9144 char _axis; 9145 9146 // for ast_step/ast_predicate/ast_filter 7387 9147 char _test; 7388 9148 … … 7402 9162 // node test for ast_step (node name/namespace/node type/pi target) 7403 9163 const char_t* nodetest; 9164 // table for ast_opt_translate_table 9165 const unsigned char* table; 7404 9166 } _data; 7405 9167 … … 7431 9193 xpath_allocator_capture cr(stack.result); 7432 9194 7433 xpath_node_set_raw ls = lhs->eval_node_set(c, stack );7434 xpath_node_set_raw rs = rhs->eval_node_set(c, stack );9195 xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all); 9196 xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all); 7435 9197 7436 9198 for (const xpath_node* li = ls.begin(); li != ls.end(); ++li) … … 7460 9222 7461 9223 double l = lhs->eval_number(c, stack); 7462 xpath_node_set_raw rs = rhs->eval_node_set(c, stack );9224 xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all); 7463 9225 7464 9226 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) … … 7477 9239 7478 9240 xpath_string l = lhs->eval_string(c, stack); 7479 xpath_node_set_raw rs = rhs->eval_node_set(c, stack );9241 xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all); 7480 9242 7481 9243 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) … … 7495 9257 } 7496 9258 9259 static bool eval_once(xpath_node_set::type_t type, nodeset_eval_t eval) 9260 { 9261 return type == xpath_node_set::type_sorted ? eval != nodeset_eval_all : eval == nodeset_eval_any; 9262 } 9263 7497 9264 template <class Comp> static bool compare_rel(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp) 7498 9265 { … … 7505 9272 xpath_allocator_capture cr(stack.result); 7506 9273 7507 xpath_node_set_raw ls = lhs->eval_node_set(c, stack );7508 xpath_node_set_raw rs = rhs->eval_node_set(c, stack );9274 xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all); 9275 xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all); 7509 9276 7510 9277 for (const xpath_node* li = ls.begin(); li != ls.end(); ++li) … … 7530 9297 7531 9298 double l = lhs->eval_number(c, stack); 7532 xpath_node_set_raw rs = rhs->eval_node_set(c, stack );9299 xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all); 7533 9300 7534 9301 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) … … 7546 9313 xpath_allocator_capture cr(stack.result); 7547 9314 7548 xpath_node_set_raw ls = lhs->eval_node_set(c, stack );9315 xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all); 7549 9316 double r = rhs->eval_number(c, stack); 7550 9317 … … 7566 9333 } 7567 9334 7568 void apply_predicate(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack)9335 static void apply_predicate_boolean(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack, bool once) 7569 9336 { 7570 9337 assert(ns.size() >= first); 9338 assert(expr->rettype() != xpath_type_number); 7571 9339 7572 9340 size_t i = 1; 7573 9341 size_t size = ns.size() - first; 7574 9342 7575 9343 xpath_node* last = ns.begin() + first; 7576 9344 7577 9345 // remove_if... or well, sort of 7578 9346 for (xpath_node* it = last; it != ns.end(); ++it, ++i) 7579 9347 { 7580 9348 xpath_context c(*it, i, size); 7581 7582 if (expr->rettype() == xpath_type_number) 7583 { 7584 if (expr->eval_number(c, stack) == i) 7585 *last++ = *it; 7586 } 7587 else if (expr->eval_boolean(c, stack)) 9349 9350 if (expr->eval_boolean(c, stack)) 9351 { 7588 9352 *last++ = *it; 7589 } 7590 9353 9354 if (once) break; 9355 } 9356 } 9357 7591 9358 ns.truncate(last); 7592 9359 } 7593 9360 7594 void apply_predicates(xpath_node_set_raw& ns, size_t first, const xpath_stack& stack) 9361 static void apply_predicate_number(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack, bool once) 9362 { 9363 assert(ns.size() >= first); 9364 assert(expr->rettype() == xpath_type_number); 9365 9366 size_t i = 1; 9367 size_t size = ns.size() - first; 9368 9369 xpath_node* last = ns.begin() + first; 9370 9371 // remove_if... or well, sort of 9372 for (xpath_node* it = last; it != ns.end(); ++it, ++i) 9373 { 9374 xpath_context c(*it, i, size); 9375 9376 if (expr->eval_number(c, stack) == i) 9377 { 9378 *last++ = *it; 9379 9380 if (once) break; 9381 } 9382 } 9383 9384 ns.truncate(last); 9385 } 9386 9387 static void apply_predicate_number_const(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack) 9388 { 9389 assert(ns.size() >= first); 9390 assert(expr->rettype() == xpath_type_number); 9391 9392 size_t size = ns.size() - first; 9393 9394 xpath_node* last = ns.begin() + first; 9395 9396 xpath_context c(xpath_node(), 1, size); 9397 9398 double er = expr->eval_number(c, stack); 9399 9400 if (er >= 1.0 && er <= size) 9401 { 9402 size_t eri = static_cast<size_t>(er); 9403 9404 if (er == eri) 9405 { 9406 xpath_node r = last[eri - 1]; 9407 9408 *last++ = r; 9409 } 9410 } 9411 9412 ns.truncate(last); 9413 } 9414 9415 void apply_predicate(xpath_node_set_raw& ns, size_t first, const xpath_stack& stack, bool once) 7595 9416 { 7596 9417 if (ns.size() == first) return; 7597 9418 9419 assert(_type == ast_filter || _type == ast_predicate); 9420 9421 if (_test == predicate_constant || _test == predicate_constant_one) 9422 apply_predicate_number_const(ns, first, _right, stack); 9423 else if (_right->rettype() == xpath_type_number) 9424 apply_predicate_number(ns, first, _right, stack, once); 9425 else 9426 apply_predicate_boolean(ns, first, _right, stack, once); 9427 } 9428 9429 void apply_predicates(xpath_node_set_raw& ns, size_t first, const xpath_stack& stack, nodeset_eval_t eval) 9430 { 9431 if (ns.size() == first) return; 9432 9433 bool last_once = eval_once(ns.type(), eval); 9434 7598 9435 for (xpath_ast_node* pred = _right; pred; pred = pred->_next) 7599 { 7600 apply_predicate(ns, first, pred->_left, stack); 7601 } 7602 } 7603 7604 void step_push(xpath_node_set_raw& ns, const xml_attribute& a, const xml_node& parent, xpath_allocator* alloc) 7605 { 7606 if (!a) return; 7607 7608 const char_t* name = a.name(); 7609 7610 // There are no attribute nodes corresponding to attributes that declare namespaces 7611 // That is, "xmlns:..." or "xmlns" 7612 if (starts_with(name, PUGIXML_TEXT("xmlns")) && (name[5] == 0 || name[5] == ':')) return; 7613 9436 pred->apply_predicate(ns, first, stack, !pred->_next && last_once); 9437 } 9438 9439 bool step_push(xpath_node_set_raw& ns, xml_attribute_struct* a, xml_node_struct* parent, xpath_allocator* alloc) 9440 { 9441 assert(a); 9442 9443 const char_t* name = a->name ? a->name + 0 : PUGIXML_TEXT(""); 9444 7614 9445 switch (_test) 7615 9446 { 7616 9447 case nodetest_name: 7617 if (strequal(name, _data.nodetest)) ns.push_back(xpath_node(a, parent), alloc); 9448 if (strequal(name, _data.nodetest) && is_xpath_attribute(name)) 9449 { 9450 ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc); 9451 return true; 9452 } 7618 9453 break; 7619 9454 7620 9455 case nodetest_type_node: 7621 9456 case nodetest_all: 7622 ns.push_back(xpath_node(a, parent), alloc); 9457 if (is_xpath_attribute(name)) 9458 { 9459 ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc); 9460 return true; 9461 } 7623 9462 break; 7624 9463 7625 9464 case nodetest_all_in_namespace: 7626 if (starts_with(name, _data.nodetest)) 7627 ns.push_back(xpath_node(a, parent), alloc); 9465 if (starts_with(name, _data.nodetest) && is_xpath_attribute(name)) 9466 { 9467 ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc); 9468 return true; 9469 } 7628 9470 break; 7629 9471 … … 7631 9473 ; 7632 9474 } 9475 9476 return false; 7633 9477 } 7634 9478 7635 void step_push(xpath_node_set_raw& ns, const xml_node& n, xpath_allocator* alloc) 7636 { 7637 if (!n) return; 9479 bool step_push(xpath_node_set_raw& ns, xml_node_struct* n, xpath_allocator* alloc) 9480 { 9481 assert(n); 9482 9483 xml_node_type type = PUGI__NODETYPE(n); 7638 9484 7639 9485 switch (_test) 7640 9486 { 7641 9487 case nodetest_name: 7642 if (n.type() == node_element && strequal(n.name(), _data.nodetest)) ns.push_back(n, alloc); 9488 if (type == node_element && n->name && strequal(n->name, _data.nodetest)) 9489 { 9490 ns.push_back(xml_node(n), alloc); 9491 return true; 9492 } 7643 9493 break; 7644 9494 7645 9495 case nodetest_type_node: 7646 ns.push_back( n, alloc);7647 break;9496 ns.push_back(xml_node(n), alloc); 9497 return true; 7648 9498 7649 9499 case nodetest_type_comment: 7650 if (n.type() == node_comment) 7651 ns.push_back(n, alloc); 9500 if (type == node_comment) 9501 { 9502 ns.push_back(xml_node(n), alloc); 9503 return true; 9504 } 7652 9505 break; 7653 9506 7654 9507 case nodetest_type_text: 7655 if (n.type() == node_pcdata || n.type() == node_cdata) 7656 ns.push_back(n, alloc); 9508 if (type == node_pcdata || type == node_cdata) 9509 { 9510 ns.push_back(xml_node(n), alloc); 9511 return true; 9512 } 7657 9513 break; 7658 9514 7659 9515 case nodetest_type_pi: 7660 if (n.type() == node_pi) 7661 ns.push_back(n, alloc); 9516 if (type == node_pi) 9517 { 9518 ns.push_back(xml_node(n), alloc); 9519 return true; 9520 } 7662 9521 break; 7663 9522 7664 9523 case nodetest_pi: 7665 if (n.type() == node_pi && strequal(n.name(), _data.nodetest)) 7666 ns.push_back(n, alloc); 9524 if (type == node_pi && n->name && strequal(n->name, _data.nodetest)) 9525 { 9526 ns.push_back(xml_node(n), alloc); 9527 return true; 9528 } 7667 9529 break; 7668 9530 7669 9531 case nodetest_all: 7670 if (n.type() == node_element) 7671 ns.push_back(n, alloc); 9532 if (type == node_element) 9533 { 9534 ns.push_back(xml_node(n), alloc); 9535 return true; 9536 } 7672 9537 break; 7673 9538 7674 9539 case nodetest_all_in_namespace: 7675 if (n.type() == node_element && starts_with(n.name(), _data.nodetest)) 7676 ns.push_back(n, alloc); 9540 if (type == node_element && n->name && starts_with(n->name, _data.nodetest)) 9541 { 9542 ns.push_back(xml_node(n), alloc); 9543 return true; 9544 } 7677 9545 break; 7678 9546 7679 9547 default: 7680 9548 assert(!"Unknown axis"); 7681 } 7682 } 7683 7684 template <class T> void step_fill(xpath_node_set_raw& ns, const xml_node& n, xpath_allocator* alloc, T) 9549 } 9550 9551 return false; 9552 } 9553 9554 template <class T> void step_fill(xpath_node_set_raw& ns, xml_node_struct* n, xpath_allocator* alloc, bool once, T) 7685 9555 { 7686 9556 const axis_t axis = T::axis; … … 7690 9560 case axis_attribute: 7691 9561 { 7692 for (xml_attribute a = n.first_attribute(); a; a = a.next_attribute()) 7693 step_push(ns, a, n, alloc); 9562 for (xml_attribute_struct* a = n->first_attribute; a; a = a->next_attribute) 9563 if (step_push(ns, a, n, alloc) & once) 9564 return; 7694 9565 7695 9566 break; … … 7698 9569 case axis_child: 7699 9570 { 7700 for (xml_node c = n.first_child(); c; c = c.next_sibling()) 7701 step_push(ns, c, alloc); 9571 for (xml_node_struct* c = n->first_child; c; c = c->next_sibling) 9572 if (step_push(ns, c, alloc) & once) 9573 return; 7702 9574 7703 9575 break; … … 7708 9580 { 7709 9581 if (axis == axis_descendant_or_self) 7710 step_push(ns, n, alloc); 9582 if (step_push(ns, n, alloc) & once) 9583 return; 7711 9584 7712 xml_node cur = n.first_child();9585 xml_node_struct* cur = n->first_child; 7713 9586 7714 while (cur && cur != n) 7715 { 7716 step_push(ns, cur, alloc); 9587 while (cur) 9588 { 9589 if (step_push(ns, cur, alloc) & once) 9590 return; 7717 9591 7718 if (cur.first_child()) 7719 cur = cur.first_child(); 7720 else if (cur.next_sibling()) 7721 cur = cur.next_sibling(); 9592 if (cur->first_child) 9593 cur = cur->first_child; 7722 9594 else 7723 9595 { 7724 while (!cur.next_sibling() && cur != n) 7725 cur = cur.parent(); 9596 while (!cur->next_sibling) 9597 { 9598 cur = cur->parent; 9599 9600 if (cur == n) return; 9601 } 7726 9602 7727 if (cur != n) cur = cur.next_sibling();9603 cur = cur->next_sibling; 7728 9604 } 7729 9605 } … … 7734 9610 case axis_following_sibling: 7735 9611 { 7736 for (xml_node c = n.next_sibling(); c; c = c.next_sibling()) 7737 step_push(ns, c, alloc); 9612 for (xml_node_struct* c = n->next_sibling; c; c = c->next_sibling) 9613 if (step_push(ns, c, alloc) & once) 9614 return; 7738 9615 7739 9616 break; … … 7742 9619 case axis_preceding_sibling: 7743 9620 { 7744 for (xml_node c = n.previous_sibling(); c; c = c.previous_sibling()) 7745 step_push(ns, c, alloc); 9621 for (xml_node_struct* c = n->prev_sibling_c; c->next_sibling; c = c->prev_sibling_c) 9622 if (step_push(ns, c, alloc) & once) 9623 return; 7746 9624 7747 9625 break; … … 7750 9628 case axis_following: 7751 9629 { 7752 xml_node cur = n;9630 xml_node_struct* cur = n; 7753 9631 7754 9632 // exit from this node so that we don't include descendants 7755 while (cur && !cur.next_sibling()) cur = cur.parent(); 7756 cur = cur.next_sibling(); 7757 7758 for (;;) 7759 { 7760 step_push(ns, cur, alloc); 7761 7762 if (cur.first_child()) 7763 cur = cur.first_child(); 7764 else if (cur.next_sibling()) 7765 cur = cur.next_sibling(); 9633 while (!cur->next_sibling) 9634 { 9635 cur = cur->parent; 9636 9637 if (!cur) return; 9638 } 9639 9640 cur = cur->next_sibling; 9641 9642 while (cur) 9643 { 9644 if (step_push(ns, cur, alloc) & once) 9645 return; 9646 9647 if (cur->first_child) 9648 cur = cur->first_child; 7766 9649 else 7767 9650 { 7768 while (cur && !cur.next_sibling()) cur = cur.parent(); 7769 cur = cur.next_sibling(); 7770 7771 if (!cur) break; 9651 while (!cur->next_sibling) 9652 { 9653 cur = cur->parent; 9654 9655 if (!cur) return; 9656 } 9657 9658 cur = cur->next_sibling; 7772 9659 } 7773 9660 } … … 7778 9665 case axis_preceding: 7779 9666 { 7780 xml_node cur = n; 7781 7782 while (cur && !cur.previous_sibling()) cur = cur.parent(); 7783 cur = cur.previous_sibling(); 7784 7785 for (;;) 7786 { 7787 if (cur.last_child()) 7788 cur = cur.last_child(); 9667 xml_node_struct* cur = n; 9668 9669 // exit from this node so that we don't include descendants 9670 while (!cur->prev_sibling_c->next_sibling) 9671 { 9672 cur = cur->parent; 9673 9674 if (!cur) return; 9675 } 9676 9677 cur = cur->prev_sibling_c; 9678 9679 while (cur) 9680 { 9681 if (cur->first_child) 9682 cur = cur->first_child->prev_sibling_c; 7789 9683 else 7790 9684 { 7791 9685 // leaf node, can't be ancestor 7792 step_push(ns, cur, alloc); 7793 7794 if (cur.previous_sibling()) 7795 cur = cur.previous_sibling(); 7796 else 9686 if (step_push(ns, cur, alloc) & once) 9687 return; 9688 9689 while (!cur->prev_sibling_c->next_sibling) 7797 9690 { 7798 do 7799 { 7800 cur = cur.parent(); 7801 if (!cur) break; 7802 7803 if (!node_is_ancestor(cur, n)) step_push(ns, cur, alloc); 7804 } 7805 while (!cur.previous_sibling()); 7806 7807 cur = cur.previous_sibling(); 7808 7809 if (!cur) break; 9691 cur = cur->parent; 9692 9693 if (!cur) return; 9694 9695 if (!node_is_ancestor(cur, n)) 9696 if (step_push(ns, cur, alloc) & once) 9697 return; 7810 9698 } 9699 9700 cur = cur->prev_sibling_c; 7811 9701 } 7812 9702 } … … 7819 9709 { 7820 9710 if (axis == axis_ancestor_or_self) 7821 step_push(ns, n, alloc); 7822 7823 xml_node cur = n.parent(); 9711 if (step_push(ns, n, alloc) & once) 9712 return; 9713 9714 xml_node_struct* cur = n->parent; 7824 9715 7825 9716 while (cur) 7826 9717 { 7827 step_push(ns, cur, alloc); 9718 if (step_push(ns, cur, alloc) & once) 9719 return; 7828 9720 7829 cur = cur .parent();9721 cur = cur->parent; 7830 9722 } 7831 9723 … … 7842 9734 case axis_parent: 7843 9735 { 7844 if (n.parent()) step_push(ns, n.parent(), alloc); 9736 if (n->parent) 9737 step_push(ns, n->parent, alloc); 7845 9738 7846 9739 break; … … 7852 9745 } 7853 9746 7854 template <class T> void step_fill(xpath_node_set_raw& ns, const xml_attribute& a, const xml_node& p, xpath_allocator* alloc, T v)9747 template <class T> void step_fill(xpath_node_set_raw& ns, xml_attribute_struct* a, xml_node_struct* p, xpath_allocator* alloc, bool once, T v) 7855 9748 { 7856 9749 const axis_t axis = T::axis; … … 7862 9755 { 7863 9756 if (axis == axis_ancestor_or_self && _test == nodetest_type_node) // reject attributes based on principal node type test 7864 step_push(ns, a, p, alloc); 7865 7866 xml_node cur = p; 9757 if (step_push(ns, a, p, alloc) & once) 9758 return; 9759 9760 xml_node_struct* cur = p; 7867 9761 7868 9762 while (cur) 7869 9763 { 7870 step_push(ns, cur, alloc); 9764 if (step_push(ns, cur, alloc) & once) 9765 return; 7871 9766 7872 cur = cur .parent();9767 cur = cur->parent; 7873 9768 } 7874 9769 … … 7887 9782 case axis_following: 7888 9783 { 7889 xml_node cur = p;9784 xml_node_struct* cur = p; 7890 9785 7891 for (;;) 7892 { 7893 if (cur.first_child()) 7894 cur = cur.first_child(); 7895 else if (cur.next_sibling()) 7896 cur = cur.next_sibling(); 9786 while (cur) 9787 { 9788 if (cur->first_child) 9789 cur = cur->first_child; 7897 9790 else 7898 9791 { 7899 while (cur && !cur.next_sibling()) cur = cur.parent(); 7900 cur = cur.next_sibling(); 7901 7902 if (!cur) break; 9792 while (!cur->next_sibling) 9793 { 9794 cur = cur->parent; 9795 9796 if (!cur) return; 9797 } 9798 9799 cur = cur->next_sibling; 7903 9800 } 7904 9801 7905 step_push(ns, cur, alloc); 9802 if (step_push(ns, cur, alloc) & once) 9803 return; 7906 9804 } 7907 9805 … … 7919 9817 { 7920 9818 // preceding:: axis does not include attribute nodes and attribute ancestors (they are the same as parent's ancestors), so we can reuse node preceding 7921 step_fill(ns, p, alloc, v);9819 step_fill(ns, p, alloc, once, v); 7922 9820 break; 7923 9821 } … … 7927 9825 } 7928 9826 } 7929 7930 template <class T> xpath_node_set_raw step_do(const xpath_context& c, const xpath_stack& stack, T v)9827 9828 template <class T> void step_fill(xpath_node_set_raw& ns, const xpath_node& xn, xpath_allocator* alloc, bool once, T v) 7931 9829 { 7932 9830 const axis_t axis = T::axis; 7933 bool attributes = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_descendant_or_self || axis == axis_following || axis == axis_parent || axis == axis_preceding || axis == axis_self); 9831 const bool axis_has_attributes = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_descendant_or_self || axis == axis_following || axis == axis_parent || axis == axis_preceding || axis == axis_self); 9832 9833 if (xn.node()) 9834 step_fill(ns, xn.node().internal_object(), alloc, once, v); 9835 else if (axis_has_attributes && xn.attribute() && xn.parent()) 9836 step_fill(ns, xn.attribute().internal_object(), xn.parent().internal_object(), alloc, once, v); 9837 } 9838 9839 template <class T> xpath_node_set_raw step_do(const xpath_context& c, const xpath_stack& stack, nodeset_eval_t eval, T v) 9840 { 9841 const axis_t axis = T::axis; 9842 const bool axis_reverse = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_preceding || axis == axis_preceding_sibling); 9843 const xpath_node_set::type_t axis_type = axis_reverse ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted; 9844 9845 bool once = 9846 (axis == axis_attribute && _test == nodetest_name) || 9847 (!_right && eval_once(axis_type, eval)) || 9848 (_right && !_right->_next && _right->_test == predicate_constant_one); 7934 9849 7935 9850 xpath_node_set_raw ns; 7936 ns.set_type( (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_preceding || axis == axis_preceding_sibling) ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted);9851 ns.set_type(axis_type); 7937 9852 7938 9853 if (_left) 7939 9854 { 7940 xpath_node_set_raw s = _left->eval_node_set(c, stack );9855 xpath_node_set_raw s = _left->eval_node_set(c, stack, nodeset_eval_all); 7941 9856 7942 9857 // self axis preserves the original order … … 7950 9865 if (axis != axis_self && size != 0) ns.set_type(xpath_node_set::type_unsorted); 7951 9866 7952 if (it->node()) 7953 step_fill(ns, it->node(), stack.result, v); 7954 else if (attributes) 7955 step_fill(ns, it->attribute(), it->parent(), stack.result, v); 7956 7957 apply_predicates(ns, size, stack); 9867 step_fill(ns, *it, stack.result, once, v); 9868 if (_right) apply_predicates(ns, size, stack, eval); 7958 9869 } 7959 9870 } 7960 9871 else 7961 9872 { 7962 if (c.n.node()) 7963 step_fill(ns, c.n.node(), stack.result, v); 7964 else if (attributes) 7965 step_fill(ns, c.n.attribute(), c.n.parent(), stack.result, v); 7966 7967 apply_predicates(ns, 0, stack); 9873 step_fill(ns, c.n, stack.result, once, v); 9874 if (_right) apply_predicates(ns, 0, stack, eval); 7968 9875 } 7969 9876 … … 8006 9913 _type(static_cast<char>(type)), _rettype(xpath_type_node_set), _axis(static_cast<char>(axis)), _test(static_cast<char>(test)), _left(left), _right(0), _next(0) 8007 9914 { 9915 assert(type == ast_step); 8008 9916 _data.nodetest = contents; 9917 } 9918 9919 xpath_ast_node(ast_type_t type, xpath_ast_node* left, xpath_ast_node* right, predicate_t test): 9920 _type(static_cast<char>(type)), _rettype(xpath_type_node_set), _axis(0), _test(static_cast<char>(test)), _left(left), _right(right), _next(0) 9921 { 9922 assert(type == ast_filter || type == ast_predicate); 8009 9923 } 8010 9924 … … 8109 10023 } 8110 10024 10025 case ast_opt_compare_attribute: 10026 { 10027 const char_t* value = (_right->_type == ast_string_constant) ? _right->_data.string : _right->_data.variable->get_string(); 10028 10029 xml_attribute attr = c.n.node().attribute(_left->_data.nodetest); 10030 10031 return attr && strequal(attr.value(), value) && is_xpath_attribute(attr.name()); 10032 } 10033 8111 10034 case ast_variable: 8112 10035 { … … 8137 10060 xpath_allocator_capture cr(stack.result); 8138 10061 8139 return !eval_node_set(c, stack ).empty();10062 return !eval_node_set(c, stack, nodeset_eval_any).empty(); 8140 10063 } 8141 10064 … … 8183 10106 xpath_allocator_capture cr(stack.result); 8184 10107 8185 return static_cast<double>(_left->eval_node_set(c, stack ).size());10108 return static_cast<double>(_left->eval_node_set(c, stack, nodeset_eval_all).size()); 8186 10109 } 8187 10110 … … 8216 10139 double r = 0; 8217 10140 8218 xpath_node_set_raw ns = _left->eval_node_set(c, stack );10141 xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_all); 8219 10142 8220 10143 for (const xpath_node* it = ns.begin(); it != ns.end(); ++it) … … 8331 10254 *ri = 0; 8332 10255 8333 return xpath_string (result, true);10256 return xpath_string::from_heap_preallocated(result, ri); 8334 10257 } 8335 10258 … … 8339 10262 { 8340 10263 case ast_string_constant: 8341 return xpath_string _const(_data.string);10264 return xpath_string::from_const(_data.string); 8342 10265 8343 10266 case ast_func_local_name_0: … … 8345 10268 xpath_node na = c.n; 8346 10269 8347 return xpath_string _const(local_name(na));10270 return xpath_string::from_const(local_name(na)); 8348 10271 } 8349 10272 … … 8352 10275 xpath_allocator_capture cr(stack.result); 8353 10276 8354 xpath_node_set_raw ns = _left->eval_node_set(c, stack );10277 xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first); 8355 10278 xpath_node na = ns.first(); 8356 10279 8357 return xpath_string _const(local_name(na));10280 return xpath_string::from_const(local_name(na)); 8358 10281 } 8359 10282 … … 8362 10285 xpath_node na = c.n; 8363 10286 8364 return xpath_string _const(qualified_name(na));10287 return xpath_string::from_const(qualified_name(na)); 8365 10288 } 8366 10289 … … 8369 10292 xpath_allocator_capture cr(stack.result); 8370 10293 8371 xpath_node_set_raw ns = _left->eval_node_set(c, stack );10294 xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first); 8372 10295 xpath_node na = ns.first(); 8373 10296 8374 return xpath_string _const(qualified_name(na));10297 return xpath_string::from_const(qualified_name(na)); 8375 10298 } 8376 10299 … … 8379 10302 xpath_node na = c.n; 8380 10303 8381 return xpath_string _const(namespace_uri(na));10304 return xpath_string::from_const(namespace_uri(na)); 8382 10305 } 8383 10306 … … 8386 10309 xpath_allocator_capture cr(stack.result); 8387 10310 8388 xpath_node_set_raw ns = _left->eval_node_set(c, stack );10311 xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first); 8389 10312 xpath_node na = ns.first(); 8390 10313 8391 return xpath_string _const(namespace_uri(na));10314 return xpath_string::from_const(namespace_uri(na)); 8392 10315 } 8393 10316 … … 8412 10335 const char_t* pos = find_substring(s.c_str(), p.c_str()); 8413 10336 8414 return pos ? xpath_string (s.c_str(), pos, stack.result) : xpath_string();10337 return pos ? xpath_string::from_heap(s.c_str(), pos, stack.result) : xpath_string(); 8415 10338 } 8416 10339 … … 8427 10350 if (!pos) return xpath_string(); 8428 10351 8429 const char_t* result = pos + p.length(); 8430 8431 return s.uses_heap() ? xpath_string(result, stack.result) : xpath_string_const(result); 10352 const char_t* rbegin = pos + p.length(); 10353 const char_t* rend = s.c_str() + s.length(); 10354 10355 return s.uses_heap() ? xpath_string::from_heap(rbegin, rend, stack.result) : xpath_string::from_const(rbegin); 8432 10356 } 8433 10357 … … 8450 10374 8451 10375 const char_t* rbegin = s.c_str() + (pos - 1); 10376 const char_t* rend = s.c_str() + s.length(); 8452 10377 8453 return s.uses_heap() ? xpath_string (rbegin, stack.result) : xpath_string_const(rbegin);10378 return s.uses_heap() ? xpath_string::from_heap(rbegin, rend, stack.result) : xpath_string::from_const(rbegin); 8454 10379 } 8455 10380 … … 8478 10403 const char_t* rend = s.c_str() + (end - 1); 8479 10404 8480 return (end == s_length + 1 && !s.uses_heap()) ? xpath_string _const(rbegin) : xpath_string(rbegin, rend, stack.result);10405 return (end == s_length + 1 && !s.uses_heap()) ? xpath_string::from_const(rbegin) : xpath_string::from_heap(rbegin, rend, stack.result); 8481 10406 } 8482 10407 … … 8485 10410 xpath_string s = string_value(c.n, stack.result); 8486 10411 8487 normalize_space(s.data(stack.result)); 8488 8489 return s; 10412 char_t* begin = s.data(stack.result); 10413 char_t* end = normalize_space(begin); 10414 10415 return xpath_string::from_heap_preallocated(begin, end); 8490 10416 } 8491 10417 … … 8494 10420 xpath_string s = _left->eval_string(c, stack); 8495 10421 8496 normalize_space(s.data(stack.result)); 10422 char_t* begin = s.data(stack.result); 10423 char_t* end = normalize_space(begin); 8497 10424 8498 return s;10425 return xpath_string::from_heap_preallocated(begin, end); 8499 10426 } 8500 10427 … … 8509 10436 xpath_string to = _right->_next->eval_string(c, swapped_stack); 8510 10437 8511 translate(s.data(stack.result), from.c_str(), to.c_str()); 8512 8513 return s; 10438 char_t* begin = s.data(stack.result); 10439 char_t* end = translate(begin, from.c_str(), to.c_str(), to.length()); 10440 10441 return xpath_string::from_heap_preallocated(begin, end); 10442 } 10443 10444 case ast_opt_translate_table: 10445 { 10446 xpath_string s = _left->eval_string(c, stack); 10447 10448 char_t* begin = s.data(stack.result); 10449 char_t* end = translate_table(begin, _data.table); 10450 10451 return xpath_string::from_heap_preallocated(begin, end); 8514 10452 } 8515 10453 … … 8519 10457 8520 10458 if (_rettype == xpath_type_string) 8521 return xpath_string _const(_data.variable->get_string());10459 return xpath_string::from_const(_data.variable->get_string()); 8522 10460 8523 10461 // fallthrough to type conversion … … 8529 10467 { 8530 10468 case xpath_type_boolean: 8531 return xpath_string _const(eval_boolean(c, stack) ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"));10469 return xpath_string::from_const(eval_boolean(c, stack) ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false")); 8532 10470 8533 10471 case xpath_type_number: … … 8540 10478 xpath_stack swapped_stack = {stack.temp, stack.result}; 8541 10479 8542 xpath_node_set_raw ns = eval_node_set(c, swapped_stack );10480 xpath_node_set_raw ns = eval_node_set(c, swapped_stack, nodeset_eval_first); 8543 10481 return ns.empty() ? xpath_string() : string_value(ns.first(), stack.result); 8544 10482 } … … 8552 10490 } 8553 10491 8554 xpath_node_set_raw eval_node_set(const xpath_context& c, const xpath_stack& stack )10492 xpath_node_set_raw eval_node_set(const xpath_context& c, const xpath_stack& stack, nodeset_eval_t eval) 8555 10493 { 8556 10494 switch (_type) … … 8562 10500 xpath_stack swapped_stack = {stack.temp, stack.result}; 8563 10501 8564 xpath_node_set_raw ls = _left->eval_node_set(c, swapped_stack );8565 xpath_node_set_raw rs = _right->eval_node_set(c, stack );8566 10502 xpath_node_set_raw ls = _left->eval_node_set(c, swapped_stack, eval); 10503 xpath_node_set_raw rs = _right->eval_node_set(c, stack, eval); 10504 8567 10505 // we can optimize merging two sorted sets, but this is a very rare operation, so don't bother 8568 10506 rs.set_type(xpath_node_set::type_unsorted); … … 8570 10508 rs.append(ls.begin(), ls.end(), stack.result); 8571 10509 rs.remove_duplicates(); 8572 10510 8573 10511 return rs; 8574 10512 } 8575 10513 8576 10514 case ast_filter: 8577 case ast_filter_posinv: 8578 { 8579 xpath_node_set_raw set = _left->eval_node_set(c, stack); 10515 { 10516 xpath_node_set_raw set = _left->eval_node_set(c, stack, _test == predicate_constant_one ? nodeset_eval_first : nodeset_eval_all); 8580 10517 8581 10518 // either expression is a number or it contains position() call; sort by document order 8582 if (_type == ast_filter) set.sort_do(); 8583 8584 apply_predicate(set, 0, _right, stack); 10519 if (_test != predicate_posinv) set.sort_do(); 10520 10521 bool once = eval_once(set.type(), eval); 10522 10523 apply_predicate(set, 0, stack, once); 8585 10524 8586 10525 return set; … … 8595 10534 { 8596 10535 case axis_ancestor: 8597 return step_do(c, stack, axis_to_type<axis_ancestor>());10536 return step_do(c, stack, eval, axis_to_type<axis_ancestor>()); 8598 10537 8599 10538 case axis_ancestor_or_self: 8600 return step_do(c, stack, axis_to_type<axis_ancestor_or_self>());10539 return step_do(c, stack, eval, axis_to_type<axis_ancestor_or_self>()); 8601 10540 8602 10541 case axis_attribute: 8603 return step_do(c, stack, axis_to_type<axis_attribute>());10542 return step_do(c, stack, eval, axis_to_type<axis_attribute>()); 8604 10543 8605 10544 case axis_child: 8606 return step_do(c, stack, axis_to_type<axis_child>());10545 return step_do(c, stack, eval, axis_to_type<axis_child>()); 8607 10546 8608 10547 case axis_descendant: 8609 return step_do(c, stack, axis_to_type<axis_descendant>());10548 return step_do(c, stack, eval, axis_to_type<axis_descendant>()); 8610 10549 8611 10550 case axis_descendant_or_self: 8612 return step_do(c, stack, axis_to_type<axis_descendant_or_self>());10551 return step_do(c, stack, eval, axis_to_type<axis_descendant_or_self>()); 8613 10552 8614 10553 case axis_following: 8615 return step_do(c, stack, axis_to_type<axis_following>());10554 return step_do(c, stack, eval, axis_to_type<axis_following>()); 8616 10555 8617 10556 case axis_following_sibling: 8618 return step_do(c, stack, axis_to_type<axis_following_sibling>());10557 return step_do(c, stack, eval, axis_to_type<axis_following_sibling>()); 8619 10558 8620 10559 case axis_namespace: … … 8623 10562 8624 10563 case axis_parent: 8625 return step_do(c, stack, axis_to_type<axis_parent>());10564 return step_do(c, stack, eval, axis_to_type<axis_parent>()); 8626 10565 8627 10566 case axis_preceding: 8628 return step_do(c, stack, axis_to_type<axis_preceding>());10567 return step_do(c, stack, eval, axis_to_type<axis_preceding>()); 8629 10568 8630 10569 case axis_preceding_sibling: 8631 return step_do(c, stack, axis_to_type<axis_preceding_sibling>());10570 return step_do(c, stack, eval, axis_to_type<axis_preceding_sibling>()); 8632 10571 8633 10572 case axis_self: 8634 return step_do(c, stack, axis_to_type<axis_self>());10573 return step_do(c, stack, eval, axis_to_type<axis_self>()); 8635 10574 8636 10575 default: … … 8678 10617 } 8679 10618 } 10619 10620 void optimize(xpath_allocator* alloc) 10621 { 10622 if (_left) _left->optimize(alloc); 10623 if (_right) _right->optimize(alloc); 10624 if (_next) _next->optimize(alloc); 10625 10626 optimize_self(alloc); 10627 } 10628 10629 void optimize_self(xpath_allocator* alloc) 10630 { 10631 // Rewrite [position()=expr] with [expr] 10632 // Note that this step has to go before classification to recognize [position()=1] 10633 if ((_type == ast_filter || _type == ast_predicate) && 10634 _right->_type == ast_op_equal && _right->_left->_type == ast_func_position && _right->_right->_rettype == xpath_type_number) 10635 { 10636 _right = _right->_right; 10637 } 10638 10639 // Classify filter/predicate ops to perform various optimizations during evaluation 10640 if (_type == ast_filter || _type == ast_predicate) 10641 { 10642 assert(_test == predicate_default); 10643 10644 if (_right->_type == ast_number_constant && _right->_data.number == 1.0) 10645 _test = predicate_constant_one; 10646 else if (_right->_rettype == xpath_type_number && (_right->_type == ast_number_constant || _right->_type == ast_variable || _right->_type == ast_func_last)) 10647 _test = predicate_constant; 10648 else if (_right->_rettype != xpath_type_number && _right->is_posinv_expr()) 10649 _test = predicate_posinv; 10650 } 10651 10652 // Rewrite descendant-or-self::node()/child::foo with descendant::foo 10653 // The former is a full form of //foo, the latter is much faster since it executes the node test immediately 10654 // Do a similar kind of rewrite for self/descendant/descendant-or-self axes 10655 // Note that we only rewrite positionally invariant steps (//foo[1] != /descendant::foo[1]) 10656 if (_type == ast_step && (_axis == axis_child || _axis == axis_self || _axis == axis_descendant || _axis == axis_descendant_or_self) && _left && 10657 _left->_type == ast_step && _left->_axis == axis_descendant_or_self && _left->_test == nodetest_type_node && !_left->_right && 10658 is_posinv_step()) 10659 { 10660 if (_axis == axis_child || _axis == axis_descendant) 10661 _axis = axis_descendant; 10662 else 10663 _axis = axis_descendant_or_self; 10664 10665 _left = _left->_left; 10666 } 10667 10668 // Use optimized lookup table implementation for translate() with constant arguments 10669 if (_type == ast_func_translate && _right->_type == ast_string_constant && _right->_next->_type == ast_string_constant) 10670 { 10671 unsigned char* table = translate_table_generate(alloc, _right->_data.string, _right->_next->_data.string); 10672 10673 if (table) 10674 { 10675 _type = ast_opt_translate_table; 10676 _data.table = table; 10677 } 10678 } 10679 10680 // Use optimized path for @attr = 'value' or @attr = $value 10681 if (_type == ast_op_equal && 10682 _left->_type == ast_step && _left->_axis == axis_attribute && _left->_test == nodetest_name && !_left->_left && !_left->_right && 10683 (_right->_type == ast_string_constant || (_right->_type == ast_variable && _right->_rettype == xpath_type_string))) 10684 { 10685 _type = ast_opt_compare_attribute; 10686 } 10687 } 8680 10688 8681 bool is_posinv ()10689 bool is_posinv_expr() const 8682 10690 { 8683 10691 switch (_type) 8684 10692 { 8685 10693 case ast_func_position: 10694 case ast_func_last: 8686 10695 return false; 8687 10696 … … 8697 10706 case ast_predicate: 8698 10707 case ast_filter: 8699 case ast_filter_posinv:8700 10708 return true; 8701 10709 8702 10710 default: 8703 if (_left && !_left->is_posinv ()) return false;10711 if (_left && !_left->is_posinv_expr()) return false; 8704 10712 8705 10713 for (xpath_ast_node* n = _right; n; n = n->_next) 8706 if (!n->is_posinv ()) return false;10714 if (!n->is_posinv_expr()) return false; 8707 10715 8708 10716 return true; … … 8710 10718 } 8711 10719 10720 bool is_posinv_step() const 10721 { 10722 assert(_type == ast_step); 10723 10724 for (xpath_ast_node* n = _right; n; n = n->_next) 10725 { 10726 assert(n->_type == ast_predicate); 10727 10728 if (n->_test != predicate_posinv) 10729 return false; 10730 } 10731 10732 return true; 10733 } 10734 8712 10735 xpath_value_type rettype() const 8713 10736 { … … 8725 10748 8726 10749 xpath_parse_result* _result; 10750 10751 char_t _scratch[32]; 8727 10752 8728 10753 #ifdef PUGIXML_NO_EXCEPTIONS … … 8768 10793 char_t* c = static_cast<char_t*>(_alloc->allocate_nothrow((length + 1) * sizeof(char_t))); 8769 10794 if (!c) throw_error_oom(); 10795 assert(c); // workaround for clang static analysis 8770 10796 8771 10797 memcpy(c, value.begin, length * sizeof(char_t)); … … 8803 10829 } 8804 10830 else if (name == PUGIXML_TEXT("contains") && argc == 2) 8805 return new (alloc_node()) xpath_ast_node(ast_func_contains, xpath_type_ string, args[0], args[1]);10831 return new (alloc_node()) xpath_ast_node(ast_func_contains, xpath_type_boolean, args[0], args[1]); 8806 10832 else if (name == PUGIXML_TEXT("concat") && argc >= 2) 8807 10833 return new (alloc_node()) xpath_ast_node(ast_func_concat, xpath_type_string, args[0], args[1]); … … 8865 10891 return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_string_0 : ast_func_string_1, xpath_type_string, args[0]); 8866 10892 else if (name == PUGIXML_TEXT("string-length") && argc <= 1) 8867 return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_string_length_0 : ast_func_string_length_1, xpath_type_ string, args[0]);10893 return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_string_length_0 : ast_func_string_length_1, xpath_type_number, args[0]); 8868 10894 else if (name == PUGIXML_TEXT("starts-with") && argc == 2) 8869 10895 return new (alloc_node()) xpath_ast_node(ast_func_starts_with, xpath_type_boolean, args[0], args[1]); … … 9014 11040 throw_error("Unknown variable: variable set is not provided"); 9015 11041 9016 xpath_variable* var = get_variable(_variables, name.begin, name.end); 11042 xpath_variable* var = 0; 11043 if (!get_variable_scratch(_scratch, _variables, name.begin, name.end, &var)) 11044 throw_error_oom(); 9017 11045 9018 11046 if (!var) … … 9052 11080 double value = 0; 9053 11081 9054 if (!convert_string_to_number (_lexer.contents().begin, _lexer.contents().end, &value))11082 if (!convert_string_to_number_scratch(_scratch, _lexer.contents().begin, _lexer.contents().end, &value)) 9055 11083 throw_error_oom(); 9056 11084 … … 9120 11148 if (n->rettype() != xpath_type_node_set) throw_error("Predicate has to be applied to node set"); 9121 11149 9122 bool posinv = expr->rettype() != xpath_type_number && expr->is_posinv(); 9123 9124 n = new (alloc_node()) xpath_ast_node(posinv ? ast_filter_posinv : ast_filter, xpath_type_node_set, n, expr); 11150 n = new (alloc_node()) xpath_ast_node(ast_filter, n, expr, predicate_default); 9125 11151 9126 11152 if (_lexer.current() != lex_close_square_brace) … … 9266 11292 xpath_ast_node* expr = parse_expression(); 9267 11293 9268 xpath_ast_node* pred = new (alloc_node()) xpath_ast_node(ast_predicate, xpath_type_node_set, expr);11294 xpath_ast_node* pred = new (alloc_node()) xpath_ast_node(ast_predicate, 0, expr, predicate_default); 9269 11295 9270 11296 if (_lexer.current() != lex_close_square_brace) … … 9277 11303 last = pred; 9278 11304 } 9279 11305 9280 11306 return n; 9281 11307 } … … 9336 11362 // | FilterExpr '/' RelativeLocationPath 9337 11363 // | FilterExpr '//' RelativeLocationPath 9338 xpath_ast_node* parse_path_expression() 11364 // UnionExpr ::= PathExpr | UnionExpr '|' PathExpr 11365 // UnaryExpr ::= UnionExpr | '-' UnaryExpr 11366 xpath_ast_node* parse_path_or_unary_expression() 9339 11367 { 9340 11368 // Clarification. … … 9382 11410 return n; 9383 11411 } 9384 else return parse_location_path(); 9385 } 9386 9387 // UnionExpr ::= PathExpr | UnionExpr '|' PathExpr 9388 xpath_ast_node* parse_union_expression() 9389 { 9390 xpath_ast_node* n = parse_path_expression(); 9391 9392 while (_lexer.current() == lex_union) 11412 else if (_lexer.current() == lex_minus) 9393 11413 { 9394 11414 _lexer.next(); 9395 11415 9396 xpath_ast_node* expr = parse_union_expression(); 9397 9398 if (n->rettype() != xpath_type_node_set || expr->rettype() != xpath_type_node_set) 11416 // precedence 7+ - only parses union expressions 11417 xpath_ast_node* expr = parse_expression_rec(parse_path_or_unary_expression(), 7); 11418 11419 return new (alloc_node()) xpath_ast_node(ast_op_negate, xpath_type_number, expr); 11420 } 11421 else 11422 return parse_location_path(); 11423 } 11424 11425 struct binary_op_t 11426 { 11427 ast_type_t asttype; 11428 xpath_value_type rettype; 11429 int precedence; 11430 11431 binary_op_t(): asttype(ast_unknown), rettype(xpath_type_none), precedence(0) 11432 { 11433 } 11434 11435 binary_op_t(ast_type_t asttype_, xpath_value_type rettype_, int precedence_): asttype(asttype_), rettype(rettype_), precedence(precedence_) 11436 { 11437 } 11438 11439 static binary_op_t parse(xpath_lexer& lexer) 11440 { 11441 switch (lexer.current()) 11442 { 11443 case lex_string: 11444 if (lexer.contents() == PUGIXML_TEXT("or")) 11445 return binary_op_t(ast_op_or, xpath_type_boolean, 1); 11446 else if (lexer.contents() == PUGIXML_TEXT("and")) 11447 return binary_op_t(ast_op_and, xpath_type_boolean, 2); 11448 else if (lexer.contents() == PUGIXML_TEXT("div")) 11449 return binary_op_t(ast_op_divide, xpath_type_number, 6); 11450 else if (lexer.contents() == PUGIXML_TEXT("mod")) 11451 return binary_op_t(ast_op_mod, xpath_type_number, 6); 11452 else 11453 return binary_op_t(); 11454 11455 case lex_equal: 11456 return binary_op_t(ast_op_equal, xpath_type_boolean, 3); 11457 11458 case lex_not_equal: 11459 return binary_op_t(ast_op_not_equal, xpath_type_boolean, 3); 11460 11461 case lex_less: 11462 return binary_op_t(ast_op_less, xpath_type_boolean, 4); 11463 11464 case lex_greater: 11465 return binary_op_t(ast_op_greater, xpath_type_boolean, 4); 11466 11467 case lex_less_or_equal: 11468 return binary_op_t(ast_op_less_or_equal, xpath_type_boolean, 4); 11469 11470 case lex_greater_or_equal: 11471 return binary_op_t(ast_op_greater_or_equal, xpath_type_boolean, 4); 11472 11473 case lex_plus: 11474 return binary_op_t(ast_op_add, xpath_type_number, 5); 11475 11476 case lex_minus: 11477 return binary_op_t(ast_op_subtract, xpath_type_number, 5); 11478 11479 case lex_multiply: 11480 return binary_op_t(ast_op_multiply, xpath_type_number, 6); 11481 11482 case lex_union: 11483 return binary_op_t(ast_op_union, xpath_type_node_set, 7); 11484 11485 default: 11486 return binary_op_t(); 11487 } 11488 } 11489 }; 11490 11491 xpath_ast_node* parse_expression_rec(xpath_ast_node* lhs, int limit) 11492 { 11493 binary_op_t op = binary_op_t::parse(_lexer); 11494 11495 while (op.asttype != ast_unknown && op.precedence >= limit) 11496 { 11497 _lexer.next(); 11498 11499 xpath_ast_node* rhs = parse_path_or_unary_expression(); 11500 11501 binary_op_t nextop = binary_op_t::parse(_lexer); 11502 11503 while (nextop.asttype != ast_unknown && nextop.precedence > op.precedence) 11504 { 11505 rhs = parse_expression_rec(rhs, nextop.precedence); 11506 11507 nextop = binary_op_t::parse(_lexer); 11508 } 11509 11510 if (op.asttype == ast_op_union && (lhs->rettype() != xpath_type_node_set || rhs->rettype() != xpath_type_node_set)) 9399 11511 throw_error("Union operator has to be applied to node sets"); 9400 11512 9401 n = new (alloc_node()) xpath_ast_node(ast_op_union, xpath_type_node_set, n, expr); 9402 } 9403 9404 return n; 9405 } 9406 9407 // UnaryExpr ::= UnionExpr | '-' UnaryExpr 9408 xpath_ast_node* parse_unary_expression() 9409 { 9410 if (_lexer.current() == lex_minus) 9411 { 9412 _lexer.next(); 9413 9414 xpath_ast_node* expr = parse_unary_expression(); 9415 9416 return new (alloc_node()) xpath_ast_node(ast_op_negate, xpath_type_number, expr); 9417 } 9418 else return parse_union_expression(); 9419 } 9420 9421 // MultiplicativeExpr ::= UnaryExpr 9422 // | MultiplicativeExpr '*' UnaryExpr 9423 // | MultiplicativeExpr 'div' UnaryExpr 9424 // | MultiplicativeExpr 'mod' UnaryExpr 9425 xpath_ast_node* parse_multiplicative_expression() 9426 { 9427 xpath_ast_node* n = parse_unary_expression(); 9428 9429 while (_lexer.current() == lex_multiply || (_lexer.current() == lex_string && 9430 (_lexer.contents() == PUGIXML_TEXT("mod") || _lexer.contents() == PUGIXML_TEXT("div")))) 9431 { 9432 ast_type_t op = _lexer.current() == lex_multiply ? ast_op_multiply : 9433 _lexer.contents().begin[0] == 'd' ? ast_op_divide : ast_op_mod; 9434 _lexer.next(); 9435 9436 xpath_ast_node* expr = parse_unary_expression(); 9437 9438 n = new (alloc_node()) xpath_ast_node(op, xpath_type_number, n, expr); 9439 } 9440 9441 return n; 9442 } 9443 9444 // AdditiveExpr ::= MultiplicativeExpr 9445 // | AdditiveExpr '+' MultiplicativeExpr 9446 // | AdditiveExpr '-' MultiplicativeExpr 9447 xpath_ast_node* parse_additive_expression() 9448 { 9449 xpath_ast_node* n = parse_multiplicative_expression(); 9450 9451 while (_lexer.current() == lex_plus || _lexer.current() == lex_minus) 9452 { 9453 lexeme_t l = _lexer.current(); 9454 9455 _lexer.next(); 9456 9457 xpath_ast_node* expr = parse_multiplicative_expression(); 9458 9459 n = new (alloc_node()) xpath_ast_node(l == lex_plus ? ast_op_add : ast_op_subtract, xpath_type_number, n, expr); 9460 } 9461 9462 return n; 9463 } 9464 11513 lhs = new (alloc_node()) xpath_ast_node(op.asttype, op.rettype, lhs, rhs); 11514 11515 op = binary_op_t::parse(_lexer); 11516 } 11517 11518 return lhs; 11519 } 11520 11521 // Expr ::= OrExpr 11522 // OrExpr ::= AndExpr | OrExpr 'or' AndExpr 11523 // AndExpr ::= EqualityExpr | AndExpr 'and' EqualityExpr 11524 // EqualityExpr ::= RelationalExpr 11525 // | EqualityExpr '=' RelationalExpr 11526 // | EqualityExpr '!=' RelationalExpr 9465 11527 // RelationalExpr ::= AdditiveExpr 9466 11528 // | RelationalExpr '<' AdditiveExpr … … 9468 11530 // | RelationalExpr '<=' AdditiveExpr 9469 11531 // | RelationalExpr '>=' AdditiveExpr 9470 xpath_ast_node* parse_relational_expression() 9471 { 9472 xpath_ast_node* n = parse_additive_expression(); 9473 9474 while (_lexer.current() == lex_less || _lexer.current() == lex_less_or_equal || 9475 _lexer.current() == lex_greater || _lexer.current() == lex_greater_or_equal) 9476 { 9477 lexeme_t l = _lexer.current(); 9478 _lexer.next(); 9479 9480 xpath_ast_node* expr = parse_additive_expression(); 9481 9482 n = new (alloc_node()) xpath_ast_node(l == lex_less ? ast_op_less : l == lex_greater ? ast_op_greater : 9483 l == lex_less_or_equal ? ast_op_less_or_equal : ast_op_greater_or_equal, xpath_type_boolean, n, expr); 9484 } 9485 9486 return n; 9487 } 9488 9489 // EqualityExpr ::= RelationalExpr 9490 // | EqualityExpr '=' RelationalExpr 9491 // | EqualityExpr '!=' RelationalExpr 9492 xpath_ast_node* parse_equality_expression() 9493 { 9494 xpath_ast_node* n = parse_relational_expression(); 9495 9496 while (_lexer.current() == lex_equal || _lexer.current() == lex_not_equal) 9497 { 9498 lexeme_t l = _lexer.current(); 9499 9500 _lexer.next(); 9501 9502 xpath_ast_node* expr = parse_relational_expression(); 9503 9504 n = new (alloc_node()) xpath_ast_node(l == lex_equal ? ast_op_equal : ast_op_not_equal, xpath_type_boolean, n, expr); 9505 } 9506 9507 return n; 9508 } 9509 9510 // AndExpr ::= EqualityExpr | AndExpr 'and' EqualityExpr 9511 xpath_ast_node* parse_and_expression() 9512 { 9513 xpath_ast_node* n = parse_equality_expression(); 9514 9515 while (_lexer.current() == lex_string && _lexer.contents() == PUGIXML_TEXT("and")) 9516 { 9517 _lexer.next(); 9518 9519 xpath_ast_node* expr = parse_equality_expression(); 9520 9521 n = new (alloc_node()) xpath_ast_node(ast_op_and, xpath_type_boolean, n, expr); 9522 } 9523 9524 return n; 9525 } 9526 9527 // OrExpr ::= AndExpr | OrExpr 'or' AndExpr 9528 xpath_ast_node* parse_or_expression() 9529 { 9530 xpath_ast_node* n = parse_and_expression(); 9531 9532 while (_lexer.current() == lex_string && _lexer.contents() == PUGIXML_TEXT("or")) 9533 { 9534 _lexer.next(); 9535 9536 xpath_ast_node* expr = parse_and_expression(); 9537 9538 n = new (alloc_node()) xpath_ast_node(ast_op_or, xpath_type_boolean, n, expr); 9539 } 9540 9541 return n; 9542 } 9543 9544 // Expr ::= OrExpr 11532 // AdditiveExpr ::= MultiplicativeExpr 11533 // | AdditiveExpr '+' MultiplicativeExpr 11534 // | AdditiveExpr '-' MultiplicativeExpr 11535 // MultiplicativeExpr ::= UnaryExpr 11536 // | MultiplicativeExpr '*' UnaryExpr 11537 // | MultiplicativeExpr 'div' UnaryExpr 11538 // | MultiplicativeExpr 'mod' UnaryExpr 9545 11539 xpath_ast_node* parse_expression() 9546 11540 { 9547 return parse_ or_expression();11541 return parse_expression_rec(parse_path_or_unary_expression(), 0); 9548 11542 } 9549 11543 … … 9584 11578 { 9585 11579 void* memory = xml_memory::allocate(sizeof(xpath_query_impl)); 11580 if (!memory) return 0; 9586 11581 9587 11582 return new (memory) xpath_query_impl(); 9588 11583 } 9589 11584 9590 static void destroy(void* ptr) 9591 { 9592 if (!ptr) return; 9593 11585 static void destroy(xpath_query_impl* impl) 11586 { 9594 11587 // free all allocated pages 9595 static_cast<xpath_query_impl*>(ptr)->alloc.release();11588 impl->alloc.release(); 9596 11589 9597 11590 // free allocator memory (with the first page) 9598 xml_memory::deallocate( ptr);11591 xml_memory::deallocate(impl); 9599 11592 } 9600 11593 … … 9602 11595 { 9603 11596 block.next = 0; 11597 block.capacity = sizeof(block.data); 9604 11598 } 9605 11599 … … 9621 11615 return impl->root->eval_string(c, sd.stack); 9622 11616 } 11617 11618 PUGI__FN impl::xpath_ast_node* evaluate_node_set_prepare(xpath_query_impl* impl) 11619 { 11620 if (!impl) return 0; 11621 11622 if (impl->root->rettype() != xpath_type_node_set) 11623 { 11624 #ifdef PUGIXML_NO_EXCEPTIONS 11625 return 0; 11626 #else 11627 xpath_parse_result res; 11628 res.error = "Expression does not evaluate to node set"; 11629 11630 throw xpath_exception(res); 11631 #endif 11632 } 11633 11634 return impl->root; 11635 } 9623 11636 PUGI__NS_END 9624 11637 … … 9705 11718 #endif 9706 11719 9707 PUGI__FN void xpath_node_set::_assign(const_iterator begin_, const_iterator end_ )11720 PUGI__FN void xpath_node_set::_assign(const_iterator begin_, const_iterator end_, type_t type_) 9708 11721 { 9709 11722 assert(begin_ <= end_); … … 9721 11734 _begin = &_storage; 9722 11735 _end = &_storage + size_; 11736 _type = type_; 9723 11737 } 9724 11738 else … … 9744 11758 _begin = storage; 9745 11759 _end = storage + size_; 9746 } 9747 } 11760 _type = type_; 11761 } 11762 } 11763 11764 #if __cplusplus >= 201103 11765 PUGI__FN void xpath_node_set::_move(xpath_node_set& rhs) 11766 { 11767 _type = rhs._type; 11768 _storage = rhs._storage; 11769 _begin = (rhs._begin == &rhs._storage) ? &_storage : rhs._begin; 11770 _end = _begin + (rhs._end - rhs._begin); 11771 11772 rhs._type = type_unsorted; 11773 rhs._begin = &rhs._storage; 11774 rhs._end = rhs._begin; 11775 } 11776 #endif 9748 11777 9749 11778 PUGI__FN xpath_node_set::xpath_node_set(): _type(type_unsorted), _begin(&_storage), _end(&_storage) … … 9751 11780 } 9752 11781 9753 PUGI__FN xpath_node_set::xpath_node_set(const_iterator begin_, const_iterator end_, type_t type_): _type(type_ ), _begin(&_storage), _end(&_storage)9754 { 9755 _assign(begin_, end_ );11782 PUGI__FN xpath_node_set::xpath_node_set(const_iterator begin_, const_iterator end_, type_t type_): _type(type_unsorted), _begin(&_storage), _end(&_storage) 11783 { 11784 _assign(begin_, end_, type_); 9756 11785 } 9757 11786 9758 11787 PUGI__FN xpath_node_set::~xpath_node_set() 9759 11788 { 9760 if (_begin != &_storage) impl::xml_memory::deallocate(_begin); 11789 if (_begin != &_storage) 11790 impl::xml_memory::deallocate(_begin); 9761 11791 } 9762 11792 9763 PUGI__FN xpath_node_set::xpath_node_set(const xpath_node_set& ns): _type( ns._type), _begin(&_storage), _end(&_storage)9764 { 9765 _assign(ns._begin, ns._end );11793 PUGI__FN xpath_node_set::xpath_node_set(const xpath_node_set& ns): _type(type_unsorted), _begin(&_storage), _end(&_storage) 11794 { 11795 _assign(ns._begin, ns._end, ns._type); 9766 11796 } 9767 11797 … … 9769 11799 { 9770 11800 if (this == &ns) return *this; 9771 9772 _type = ns._type; 9773 _assign(ns._begin, ns._end); 11801 11802 _assign(ns._begin, ns._end, ns._type); 9774 11803 9775 11804 return *this; 9776 11805 } 11806 11807 #if __cplusplus >= 201103 11808 PUGI__FN xpath_node_set::xpath_node_set(xpath_node_set&& rhs): _type(type_unsorted), _begin(&_storage), _end(&_storage) 11809 { 11810 _move(rhs); 11811 } 11812 11813 PUGI__FN xpath_node_set& xpath_node_set::operator=(xpath_node_set&& rhs) 11814 { 11815 if (this == &rhs) return *this; 11816 11817 if (_begin != &_storage) 11818 impl::xml_memory::deallocate(_begin); 11819 11820 _move(rhs); 11821 11822 return *this; 11823 } 11824 #endif 9777 11825 9778 11826 PUGI__FN xpath_node_set::type_t xpath_node_set::type() const … … 9831 11879 } 9832 11880 9833 PUGI__FN xpath_variable::xpath_variable( )11881 PUGI__FN xpath_variable::xpath_variable(xpath_value_type type_): _type(type_), _next(0) 9834 11882 { 9835 11883 } … … 9930 11978 PUGI__FN xpath_variable_set::xpath_variable_set() 9931 11979 { 9932 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) _data[i] = 0; 11980 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) 11981 _data[i] = 0; 9933 11982 } 9934 11983 … … 9936 11985 { 9937 11986 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) 9938 { 9939 xpath_variable* var = _data[i]; 9940 9941 while (var) 9942 { 9943 xpath_variable* next = var->_next; 9944 9945 impl::delete_xpath_variable(var->_type, var); 9946 9947 var = next; 9948 } 9949 } 9950 } 9951 9952 PUGI__FN xpath_variable* xpath_variable_set::find(const char_t* name) const 11987 _destroy(_data[i]); 11988 } 11989 11990 PUGI__FN xpath_variable_set::xpath_variable_set(const xpath_variable_set& rhs) 11991 { 11992 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) 11993 _data[i] = 0; 11994 11995 _assign(rhs); 11996 } 11997 11998 PUGI__FN xpath_variable_set& xpath_variable_set::operator=(const xpath_variable_set& rhs) 11999 { 12000 if (this == &rhs) return *this; 12001 12002 _assign(rhs); 12003 12004 return *this; 12005 } 12006 12007 #if __cplusplus >= 201103 12008 PUGI__FN xpath_variable_set::xpath_variable_set(xpath_variable_set&& rhs) 12009 { 12010 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) 12011 { 12012 _data[i] = rhs._data[i]; 12013 rhs._data[i] = 0; 12014 } 12015 } 12016 12017 PUGI__FN xpath_variable_set& xpath_variable_set::operator=(xpath_variable_set&& rhs) 12018 { 12019 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) 12020 { 12021 _destroy(_data[i]); 12022 12023 _data[i] = rhs._data[i]; 12024 rhs._data[i] = 0; 12025 } 12026 12027 return *this; 12028 } 12029 #endif 12030 12031 PUGI__FN void xpath_variable_set::_assign(const xpath_variable_set& rhs) 12032 { 12033 xpath_variable_set temp; 12034 12035 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) 12036 if (rhs._data[i] && !_clone(rhs._data[i], &temp._data[i])) 12037 return; 12038 12039 _swap(temp); 12040 } 12041 12042 PUGI__FN void xpath_variable_set::_swap(xpath_variable_set& rhs) 12043 { 12044 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) 12045 { 12046 xpath_variable* chain = _data[i]; 12047 12048 _data[i] = rhs._data[i]; 12049 rhs._data[i] = chain; 12050 } 12051 } 12052 12053 PUGI__FN xpath_variable* xpath_variable_set::_find(const char_t* name) const 9953 12054 { 9954 12055 const size_t hash_size = sizeof(_data) / sizeof(_data[0]); … … 9963 12064 } 9964 12065 12066 PUGI__FN bool xpath_variable_set::_clone(xpath_variable* var, xpath_variable** out_result) 12067 { 12068 xpath_variable* last = 0; 12069 12070 while (var) 12071 { 12072 // allocate storage for new variable 12073 xpath_variable* nvar = impl::new_xpath_variable(var->_type, var->name()); 12074 if (!nvar) return false; 12075 12076 // link the variable to the result immediately to handle failures gracefully 12077 if (last) 12078 last->_next = nvar; 12079 else 12080 *out_result = nvar; 12081 12082 last = nvar; 12083 12084 // copy the value; this can fail due to out-of-memory conditions 12085 if (!impl::copy_xpath_variable(nvar, var)) return false; 12086 12087 var = var->_next; 12088 } 12089 12090 return true; 12091 } 12092 12093 PUGI__FN void xpath_variable_set::_destroy(xpath_variable* var) 12094 { 12095 while (var) 12096 { 12097 xpath_variable* next = var->_next; 12098 12099 impl::delete_xpath_variable(var->_type, var); 12100 12101 var = next; 12102 } 12103 } 12104 9965 12105 PUGI__FN xpath_variable* xpath_variable_set::add(const char_t* name, xpath_value_type type) 9966 12106 { … … 9978 12118 if (result) 9979 12119 { 9980 result->_type = type;9981 12120 result->_next = _data[hash]; 9982 12121 … … 10013 12152 PUGI__FN xpath_variable* xpath_variable_set::get(const char_t* name) 10014 12153 { 10015 return find(name);12154 return _find(name); 10016 12155 } 10017 12156 10018 12157 PUGI__FN const xpath_variable* xpath_variable_set::get(const char_t* name) const 10019 12158 { 10020 return find(name);12159 return _find(name); 10021 12160 } 10022 12161 … … 10035 12174 else 10036 12175 { 10037 impl::buffer_holder impl_holder(qimpl, impl::xpath_query_impl::destroy); 12176 using impl::auto_deleter; // MSVC7 workaround 12177 auto_deleter<impl::xpath_query_impl> impl(qimpl, impl::xpath_query_impl::destroy); 10038 12178 10039 12179 qimpl->root = impl::xpath_parser::parse(query, variables, &qimpl->alloc, &_result); … … 10041 12181 if (qimpl->root) 10042 12182 { 10043 _impl = static_cast<impl::xpath_query_impl*>(impl_holder.release()); 12183 qimpl->root->optimize(&qimpl->alloc); 12184 12185 _impl = impl.release(); 10044 12186 _result.error = 0; 10045 12187 } … … 10047 12189 } 10048 12190 12191 PUGI__FN xpath_query::xpath_query(): _impl(0) 12192 { 12193 } 12194 10049 12195 PUGI__FN xpath_query::~xpath_query() 10050 12196 { 10051 impl::xpath_query_impl::destroy(_impl); 10052 } 12197 if (_impl) 12198 impl::xpath_query_impl::destroy(static_cast<impl::xpath_query_impl*>(_impl)); 12199 } 12200 12201 #if __cplusplus >= 201103 12202 PUGI__FN xpath_query::xpath_query(xpath_query&& rhs) 12203 { 12204 _impl = rhs._impl; 12205 _result = rhs._result; 12206 rhs._impl = 0; 12207 rhs._result = xpath_parse_result(); 12208 } 12209 12210 PUGI__FN xpath_query& xpath_query::operator=(xpath_query&& rhs) 12211 { 12212 if (this == &rhs) return *this; 12213 12214 if (_impl) 12215 impl::xpath_query_impl::destroy(static_cast<impl::xpath_query_impl*>(_impl)); 12216 12217 _impl = rhs._impl; 12218 _result = rhs._result; 12219 rhs._impl = 0; 12220 rhs._result = xpath_parse_result(); 12221 12222 return *this; 12223 } 12224 #endif 10053 12225 10054 12226 PUGI__FN xpath_value_type xpath_query::return_type() const … … 10092 12264 impl::xpath_stack_data sd; 10093 12265 10094 return impl::evaluate_string_impl(static_cast<impl::xpath_query_impl*>(_impl), n, sd).c_str(); 12266 impl::xpath_string r = impl::evaluate_string_impl(static_cast<impl::xpath_query_impl*>(_impl), n, sd); 12267 12268 return string_t(r.c_str(), r.length()); 10095 12269 } 10096 12270 #endif … … 10118 12292 PUGI__FN xpath_node_set xpath_query::evaluate_node_set(const xpath_node& n) const 10119 12293 { 10120 if (!_impl) return xpath_node_set(); 10121 10122 impl::xpath_ast_node* root = static_cast<impl::xpath_query_impl*>(_impl)->root; 10123 10124 if (root->rettype() != xpath_type_node_set) 10125 { 10126 #ifdef PUGIXML_NO_EXCEPTIONS 10127 return xpath_node_set(); 10128 #else 10129 xpath_parse_result res; 10130 res.error = "Expression does not evaluate to node set"; 10131 10132 throw xpath_exception(res); 10133 #endif 10134 } 10135 12294 impl::xpath_ast_node* root = impl::evaluate_node_set_prepare(static_cast<impl::xpath_query_impl*>(_impl)); 12295 if (!root) return xpath_node_set(); 12296 10136 12297 impl::xpath_context c(n, 1, 1); 10137 12298 impl::xpath_stack_data sd; … … 10141 12302 #endif 10142 12303 10143 impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack );12304 impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_all); 10144 12305 10145 12306 return xpath_node_set(r.begin(), r.end(), r.type()); 10146 12307 } 10147 12308 12309 PUGI__FN xpath_node xpath_query::evaluate_node(const xpath_node& n) const 12310 { 12311 impl::xpath_ast_node* root = impl::evaluate_node_set_prepare(static_cast<impl::xpath_query_impl*>(_impl)); 12312 if (!root) return xpath_node(); 12313 12314 impl::xpath_context c(n, 1, 1); 12315 impl::xpath_stack_data sd; 12316 12317 #ifdef PUGIXML_NO_EXCEPTIONS 12318 if (setjmp(sd.error_handler)) return xpath_node(); 12319 #endif 12320 12321 impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_first); 12322 12323 return r.first(); 12324 } 12325 10148 12326 PUGI__FN const xpath_parse_result& xpath_query::result() const 10149 12327 { … … 10163 12341 { 10164 12342 return !_impl; 12343 } 12344 12345 PUGI__FN xpath_node xml_node::select_node(const char_t* query, xpath_variable_set* variables) const 12346 { 12347 xpath_query q(query, variables); 12348 return select_node(q); 12349 } 12350 12351 PUGI__FN xpath_node xml_node::select_node(const xpath_query& query) const 12352 { 12353 return query.evaluate_node(*this); 12354 } 12355 12356 PUGI__FN xpath_node_set xml_node::select_nodes(const char_t* query, xpath_variable_set* variables) const 12357 { 12358 xpath_query q(query, variables); 12359 return select_nodes(q); 12360 } 12361 12362 PUGI__FN xpath_node_set xml_node::select_nodes(const xpath_query& query) const 12363 { 12364 return query.evaluate_node_set(*this); 10165 12365 } 10166 12366 … … 10173 12373 PUGI__FN xpath_node xml_node::select_single_node(const xpath_query& query) const 10174 12374 { 10175 xpath_node_set s = query.evaluate_node_set(*this); 10176 return s.empty() ? xpath_node() : s.first(); 10177 } 10178 10179 PUGI__FN xpath_node_set xml_node::select_nodes(const char_t* query, xpath_variable_set* variables) const 10180 { 10181 xpath_query q(query, variables); 10182 return select_nodes(q); 10183 } 10184 10185 PUGI__FN xpath_node_set xml_node::select_nodes(const xpath_query& query) const 10186 { 10187 return query.evaluate_node_set(*this); 12375 return query.evaluate_node(*this); 10188 12376 } 10189 12377 } … … 10203 12391 // Undefine all local macros (makes sure we're not leaking macros in header-only mode) 10204 12392 #undef PUGI__NO_INLINE 12393 #undef PUGI__UNLIKELY 10205 12394 #undef PUGI__STATIC_ASSERT 10206 12395 #undef PUGI__DMC_VOLATILE … … 10210 12399 #undef PUGI__FN 10211 12400 #undef PUGI__FN_NO_INLINE 12401 #undef PUGI__GETPAGE_IMPL 12402 #undef PUGI__GETPAGE 12403 #undef PUGI__NODETYPE 10212 12404 #undef PUGI__IS_CHARTYPE_IMPL 10213 12405 #undef PUGI__IS_CHARTYPE 10214 12406 #undef PUGI__IS_CHARTYPEX 12407 #undef PUGI__ENDSWITH 10215 12408 #undef PUGI__SKIPWS 10216 12409 #undef PUGI__OPTSET … … 10219 12412 #undef PUGI__SCANFOR 10220 12413 #undef PUGI__SCANWHILE 12414 #undef PUGI__SCANWHILE_UNROLL 10221 12415 #undef PUGI__ENDSEG 10222 12416 #undef PUGI__THROW_ERROR … … 10226 12420 10227 12421 /** 10228 * Copyright (c) 2006-201 2Arseny Kapoulkine12422 * Copyright (c) 2006-2015 Arseny Kapoulkine 10229 12423 * 10230 12424 * Permission is hereby granted, free of charge, to any person -
XmlTools2/trunk/libs/pugixml.hpp
r906 r1055 1 1 /** 2 * pugixml parser - version 1. 22 * pugixml parser - version 1.7 3 3 * -------------------------------------------------------- 4 * Copyright (C) 2006-201 2, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)4 * Copyright (C) 2006-2015, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) 5 5 * Report bugs and download new versions at http://pugixml.org/ 6 6 * … … 14 14 #ifndef PUGIXML_VERSION 15 15 // Define version macro; evaluates to major * 100 + minor so that it's safe to use in less-than comparisons 16 # define PUGIXML_VERSION 1 2016 # define PUGIXML_VERSION 170 17 17 #endif 18 18 … … 64 64 #endif 65 65 66 // If the platform is known to have long long support, enable long long functions 67 #ifndef PUGIXML_HAS_LONG_LONG 68 # if __cplusplus >= 201103 69 # define PUGIXML_HAS_LONG_LONG 70 # elif defined(_MSC_VER) && _MSC_VER >= 1400 71 # define PUGIXML_HAS_LONG_LONG 72 # endif 73 #endif 74 66 75 // Character interface macros 67 76 #ifdef PUGIXML_WCHAR_MODE … … 142 151 // This flag is off by default; turning it on may result in slower parsing and more memory consumption. 143 152 const unsigned int parse_ws_pcdata_single = 0x0400; 153 154 // This flag determines if leading and trailing whitespace is to be removed from plain character data. This flag is off by default. 155 const unsigned int parse_trim_pcdata = 0x0800; 156 157 // This flag determines if plain character data that does not have a parent node is added to the DOM tree, and if an empty document 158 // is a valid document. This flag is off by default. 159 const unsigned int parse_fragment = 0x1000; 144 160 145 161 // The default parsing mode. … … 188 204 const unsigned int format_save_file_text = 0x20; 189 205 206 // Write every attribute on a new line with appropriate indentation. This flag is off by default. 207 const unsigned int format_indent_attributes = 0x40; 208 190 209 // The default set of formatting flags. 191 210 // Nodes are indented depending on their depth in DOM tree, a default declaration is output if document has none. 192 211 const unsigned int format_default = format_indent; 193 212 194 213 // Forward declarations 195 214 struct xml_attribute_struct; … … 201 220 202 221 class xml_tree_walker; 222 223 struct xml_parse_result; 203 224 204 225 class xml_node; … … 218 239 public: 219 240 typedef It const_iterator; 241 typedef It iterator; 220 242 221 243 xml_object_range(It b, It e): _begin(b), _end(e) … … 318 340 float as_float(float def = 0) const; 319 341 342 #ifdef PUGIXML_HAS_LONG_LONG 343 long long as_llong(long long def = 0) const; 344 unsigned long long as_ullong(unsigned long long def = 0) const; 345 #endif 346 320 347 // Get attribute value as bool (returns true if first character is in '1tTyY' set), or the default value if attribute is empty 321 348 bool as_bool(bool def = false) const; … … 329 356 bool set_value(unsigned int rhs); 330 357 bool set_value(double rhs); 358 bool set_value(float rhs); 331 359 bool set_value(bool rhs); 360 361 #ifdef PUGIXML_HAS_LONG_LONG 362 bool set_value(long long rhs); 363 bool set_value(unsigned long long rhs); 364 #endif 332 365 333 366 // Set attribute value (equivalent to set_value without error checking) … … 336 369 xml_attribute& operator=(unsigned int rhs); 337 370 xml_attribute& operator=(double rhs); 371 xml_attribute& operator=(float rhs); 338 372 xml_attribute& operator=(bool rhs); 373 374 #ifdef PUGIXML_HAS_LONG_LONG 375 xml_attribute& operator=(long long rhs); 376 xml_attribute& operator=(unsigned long long rhs); 377 #endif 339 378 340 379 // Get next/previous attribute in the attribute list of the parent node … … 394 433 xml_node_type type() const; 395 434 396 // Get node name /value, or "" if node is empty or it has no name/value435 // Get node name, or "" if node is empty or it has no name 397 436 const char_t* name() const; 437 438 // Get node value, or "" if node is empty or it has no value 439 // Note: For <node>text</node> node.value() does not return "text"! Use child_value() or text() methods to access text inside nodes. 398 440 const char_t* value() const; 399 441 … … 425 467 xml_node previous_sibling(const char_t* name) const; 426 468 469 // Get attribute, starting the search from a hint (and updating hint so that searching for a sequence of attributes is fast) 470 xml_attribute attribute(const char_t* name, xml_attribute& hint) const; 471 427 472 // Get child value of current node; that is, value of the first child node of type PCDATA/CDATA 428 473 const char_t* child_value() const; … … 465 510 xml_node insert_copy_before(const xml_node& proto, const xml_node& node); 466 511 512 // Move the specified node to become a child of this node. Returns moved node, or empty node on errors. 513 xml_node append_move(const xml_node& moved); 514 xml_node prepend_move(const xml_node& moved); 515 xml_node insert_move_after(const xml_node& moved, const xml_node& node); 516 xml_node insert_move_before(const xml_node& moved, const xml_node& node); 517 467 518 // Remove specified attribute 468 519 bool remove_attribute(const xml_attribute& a); … … 472 523 bool remove_child(const xml_node& n); 473 524 bool remove_child(const char_t* name); 525 526 // Parses buffer as an XML document fragment and appends all nodes as children of the current node. 527 // Copies/converts the buffer, so it may be deleted or changed after the function returns. 528 // Note: append_buffer allocates memory that has the lifetime of the owning document; removing the appended nodes does not immediately reclaim that memory. 529 xml_parse_result append_buffer(const void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto); 474 530 475 531 // Find attribute using predicate. Returns first attribute for which predicate returned true. … … 538 594 #ifndef PUGIXML_NO_XPATH 539 595 // Select single node by evaluating XPath query. Returns first node from the resulting node set. 540 xpath_node select_ single_node(const char_t* query, xpath_variable_set* variables = 0) const;541 xpath_node select_ single_node(const xpath_query& query) const;596 xpath_node select_node(const char_t* query, xpath_variable_set* variables = 0) const; 597 xpath_node select_node(const xpath_query& query) const; 542 598 543 599 // Select node set by evaluating XPath query 544 600 xpath_node_set select_nodes(const char_t* query, xpath_variable_set* variables = 0) const; 545 601 xpath_node_set select_nodes(const xpath_query& query) const; 602 603 // (deprecated: use select_node instead) Select single node by evaluating XPath query. 604 xpath_node select_single_node(const char_t* query, xpath_variable_set* variables = 0) const; 605 xpath_node select_single_node(const xpath_query& query) const; 606 546 607 #endif 547 608 … … 627 688 float as_float(float def = 0) const; 628 689 690 #ifdef PUGIXML_HAS_LONG_LONG 691 long long as_llong(long long def = 0) const; 692 unsigned long long as_ullong(unsigned long long def = 0) const; 693 #endif 694 629 695 // Get text as bool (returns true if first character is in '1tTyY' set), or the default value if object is empty 630 696 bool as_bool(bool def = false) const; … … 637 703 bool set(unsigned int rhs); 638 704 bool set(double rhs); 705 bool set(float rhs); 639 706 bool set(bool rhs); 707 708 #ifdef PUGIXML_HAS_LONG_LONG 709 bool set(long long rhs); 710 bool set(unsigned long long rhs); 711 #endif 640 712 641 713 // Set text (equivalent to set without error checking) … … 644 716 xml_text& operator=(unsigned int rhs); 645 717 xml_text& operator=(double rhs); 718 xml_text& operator=(float rhs); 646 719 xml_text& operator=(bool rhs); 720 721 #ifdef PUGIXML_HAS_LONG_LONG 722 xml_text& operator=(long long rhs); 723 xml_text& operator=(unsigned long long rhs); 724 #endif 647 725 648 726 // Get the data node (node_pcdata or node_cdata) for this object … … 741 819 742 820 // Named node range helper 743 class xml_named_node_iterator 744 { 821 class PUGIXML_CLASS xml_named_node_iterator 822 { 823 friend class xml_node; 824 745 825 public: 746 826 // Iterator traits … … 751 831 752 832 #ifndef PUGIXML_NO_STL 753 typedef std:: forward_iterator_tag iterator_category;833 typedef std::bidirectional_iterator_tag iterator_category; 754 834 #endif 755 835 … … 770 850 xml_named_node_iterator operator++(int); 771 851 852 const xml_named_node_iterator& operator--(); 853 xml_named_node_iterator operator--(int); 854 772 855 private: 773 mutable xml_node _node; 856 mutable xml_node _wrap; 857 xml_node _parent; 774 858 const char_t* _name; 859 860 xml_named_node_iterator(xml_node_struct* ref, xml_node_struct* parent, const char_t* name); 775 861 }; 776 862 … … 821 907 status_bad_attribute, // Parsing error occurred while parsing element attribute 822 908 status_bad_end_element, // Parsing error occurred while parsing end element tag 823 status_end_element_mismatch // There was a mismatch of start-end tags (closing tag had incorrect name, some tag was not closed or there was an excessive closing tag) 909 status_end_element_mismatch,// There was a mismatch of start-end tags (closing tag had incorrect name, some tag was not closed or there was an excessive closing tag) 910 911 status_append_invalid_root, // Unable to append nodes since root type is not node_element or node_document (exclusive to xml_node::append_buffer) 912 913 status_no_document_element // Parsing resulted in a document without element nodes 824 914 }; 825 915 … … 856 946 // Non-copyable semantics 857 947 xml_document(const xml_document&); 858 constxml_document& operator=(const xml_document&);948 xml_document& operator=(const xml_document&); 859 949 860 950 void create(); 861 951 void destroy(); 862 863 xml_parse_result load_buffer_impl(void* contents, size_t size, unsigned int options, xml_encoding encoding, bool is_mutable, bool own);864 952 865 953 public: … … 882 970 #endif 883 971 972 // (deprecated: use load_string instead) Load document from zero-terminated string. No encoding conversions are applied. 973 xml_parse_result load(const char_t* contents, unsigned int options = parse_default); 974 884 975 // Load document from zero-terminated string. No encoding conversions are applied. 885 xml_parse_result load (const char_t* contents, unsigned int options = parse_default);976 xml_parse_result load_string(const char_t* contents, unsigned int options = parse_default); 886 977 887 978 // Load document from file … … 956 1047 xpath_variable* _next; 957 1048 958 xpath_variable( );1049 xpath_variable(xpath_value_type type); 959 1050 960 1051 // Non-copyable semantics … … 988 1079 xpath_variable* _data[64]; 989 1080 990 // Non-copyable semantics 991 xpath_variable_set(const xpath_variable_set&); 992 xpath_variable_set& operator=(const xpath_variable_set&); 993 994 xpath_variable* find(const char_t* name) const; 1081 void _assign(const xpath_variable_set& rhs); 1082 void _swap(xpath_variable_set& rhs); 1083 1084 xpath_variable* _find(const char_t* name) const; 1085 1086 static bool _clone(xpath_variable* var, xpath_variable** out_result); 1087 static void _destroy(xpath_variable* var); 995 1088 996 1089 public: … … 998 1091 xpath_variable_set(); 999 1092 ~xpath_variable_set(); 1093 1094 // Copy constructor/assignment operator 1095 xpath_variable_set(const xpath_variable_set& rhs); 1096 xpath_variable_set& operator=(const xpath_variable_set& rhs); 1097 1098 #if __cplusplus >= 201103 1099 // Move semantics support 1100 xpath_variable_set(xpath_variable_set&& rhs); 1101 xpath_variable_set& operator=(xpath_variable_set&& rhs); 1102 #endif 1000 1103 1001 1104 // Add a new variable or get the existing one, if the types match … … 1031 1134 explicit xpath_query(const char_t* query, xpath_variable_set* variables = 0); 1032 1135 1136 // Constructor 1137 xpath_query(); 1138 1033 1139 // Destructor 1034 1140 ~xpath_query(); 1141 1142 #if __cplusplus >= 201103 1143 // Move semantics support 1144 xpath_query(xpath_query&& rhs); 1145 xpath_query& operator=(xpath_query&& rhs); 1146 #endif 1035 1147 1036 1148 // Get query expression return type … … 1061 1173 // If PUGIXML_NO_EXCEPTIONS is defined, returns empty node set instead. 1062 1174 xpath_node_set evaluate_node_set(const xpath_node& n) const; 1175 1176 // Evaluate expression as node set in the specified context. 1177 // Return first node in document order, or empty node if node set is empty. 1178 // If PUGIXML_NO_EXCEPTIONS is not defined, throws xpath_exception on type mismatch and std::bad_alloc on out of memory errors. 1179 // If PUGIXML_NO_EXCEPTIONS is defined, returns empty node instead. 1180 xpath_node evaluate_node(const xpath_node& n) const; 1063 1181 1064 1182 // Get parsing result (used to get compilation errors in PUGIXML_NO_EXCEPTIONS mode) … … 1146 1264 // Constant iterator type 1147 1265 typedef const xpath_node* const_iterator; 1266 1267 // We define non-constant iterator to be the same as constant iterator so that various generic algorithms (i.e. boost foreach) work 1268 typedef const xpath_node* iterator; 1148 1269 1149 1270 // Default constructor. Constructs empty set. … … 1160 1281 xpath_node_set& operator=(const xpath_node_set& ns); 1161 1282 1283 #if __cplusplus >= 201103 1284 // Move semantics support 1285 xpath_node_set(xpath_node_set&& rhs); 1286 xpath_node_set& operator=(xpath_node_set&& rhs); 1287 #endif 1288 1162 1289 // Get collection type 1163 1290 type_t type() const; … … 1190 1317 xpath_node* _end; 1191 1318 1192 void _assign(const_iterator begin, const_iterator end); 1319 void _assign(const_iterator begin, const_iterator end, type_t type); 1320 void _move(xpath_node_set& rhs); 1193 1321 }; 1194 1322 #endif … … 1224 1352 std::bidirectional_iterator_tag PUGIXML_FUNCTION _Iter_cat(const pugi::xml_node_iterator&); 1225 1353 std::bidirectional_iterator_tag PUGIXML_FUNCTION _Iter_cat(const pugi::xml_attribute_iterator&); 1226 std:: forward_iterator_tag PUGIXML_FUNCTION _Iter_cat(const pugi::xml_named_node_iterator&);1354 std::bidirectional_iterator_tag PUGIXML_FUNCTION _Iter_cat(const pugi::xml_named_node_iterator&); 1227 1355 } 1228 1356 #endif … … 1234 1362 std::bidirectional_iterator_tag PUGIXML_FUNCTION __iterator_category(const pugi::xml_node_iterator&); 1235 1363 std::bidirectional_iterator_tag PUGIXML_FUNCTION __iterator_category(const pugi::xml_attribute_iterator&); 1236 std:: forward_iterator_tag PUGIXML_FUNCTION __iterator_category(const pugi::xml_named_node_iterator&);1364 std::bidirectional_iterator_tag PUGIXML_FUNCTION __iterator_category(const pugi::xml_named_node_iterator&); 1237 1365 } 1238 1366 #endif … … 1240 1368 #endif 1241 1369 1370 // Make sure implementation is included in header-only mode 1371 // Use macro expansion in #include to work around QMake (QTBUG-11923) 1372 #if defined(PUGIXML_HEADER_ONLY) && !defined(PUGIXML_SOURCE) 1373 # define PUGIXML_SOURCE "pugixml.cpp" 1374 # include PUGIXML_SOURCE 1375 #endif 1376 1242 1377 /** 1243 * Copyright (c) 2006-201 2Arseny Kapoulkine1378 * Copyright (c) 2006-2015 Arseny Kapoulkine 1244 1379 * 1245 1380 * Permission is hereby granted, free of charge, to any person
Note:
See TracChangeset
for help on using the changeset viewer.