attribute 'first' of <file> must be 'true' or 'false'
[reactos.git] / reactos / tools / rbuild / XML.cpp
1 // XML.cpp
2
3 #include "pch.h"
4
5 #ifdef WIN32
6 #include <direct.h>
7 #include <io.h>
8 #else
9 #include <sys/stat.h>
10 #define _MAX_PATH 255
11 #endif
12 #include <assert.h>
13
14 #include "XML.h"
15 #include "exception.h"
16 #include "ssprintf.h"
17
18 using std::string;
19 using std::vector;
20
21 #ifdef WIN32
22 #define getcwd _getcwd
23 #endif//WIN32
24
25 static const char* WS = " \t\r\n";
26 static const char* WSEQ = " =\t\r\n";
27
28 string working_directory;
29
30 class XMLInclude
31 {
32 public:
33 XMLElement *e;
34 Path path;
35
36 XMLInclude ( XMLElement* e_, const Path& path_ )
37 : e(e_), path(path_)
38 {
39 }
40 };
41
42 class XMLIncludes : public vector<XMLInclude*>
43 {
44 public:
45 ~XMLIncludes()
46 {
47 for ( size_t i = 0; i < this->size(); i++ )
48 delete (*this)[i];
49 }
50 };
51
52 void
53 InitWorkingDirectory()
54 {
55 // store the current directory for path calculations
56 working_directory.resize ( _MAX_PATH );
57 working_directory[0] = 0;
58 getcwd ( &working_directory[0], working_directory.size() );
59 working_directory.resize ( strlen ( working_directory.c_str() ) );
60 }
61
62 #ifdef _MSC_VER
63 unsigned __int64
64 #else
65 unsigned long long
66 #endif
67 filelen ( FILE* f )
68 {
69 #ifdef WIN32
70 return _filelengthi64 ( _fileno(f) );
71 #else
72 struct stat64 file_stat;
73 if ( fstat64(fileno(f), &file_stat) != 0 )
74 return 0;
75 return file_stat.st_size;
76 #endif
77 }
78
79 Path::Path()
80 {
81 if ( !working_directory.size() )
82 InitWorkingDirectory();
83 string s ( working_directory );
84 const char* p = strtok ( &s[0], "/\\" );
85 while ( p )
86 {
87 if ( *p )
88 path.push_back ( p );
89 p = strtok ( NULL, "/\\" );
90 }
91 }
92
93 Path::Path ( const Path& cwd, const string& file )
94 {
95 string s ( cwd.Fixup ( file, false ) );
96 const char* p = strtok ( &s[0], "/\\" );
97 while ( p )
98 {
99 if ( *p )
100 path.push_back ( p );
101 p = strtok ( NULL, "/\\" );
102 }
103 }
104
105 string
106 Path::Fixup ( const string& file, bool include_filename ) const
107 {
108 if ( strchr ( "/\\", file[0] )
109 #ifdef WIN32
110 // this squirreliness is b/c win32 has drive letters and *nix doesn't...
111 || file[1] == ':'
112 #endif//WIN32
113 )
114 {
115 return file;
116 }
117 vector<string> pathtmp ( path );
118 string tmp ( file );
119 const char* prev = strtok ( &tmp[0], "/\\" );
120 const char* p = strtok ( NULL, "/\\" );
121 while ( p )
122 {
123 if ( !strcmp ( prev, "." ) )
124 ; // do nothing
125 else if ( !strcmp ( prev, ".." ) )
126 {
127 // this squirreliness is b/c win32 has drive letters and *nix doesn't...
128 #ifdef WIN32
129 if ( pathtmp.size() > 1 )
130 #else
131 if ( pathtmp.size() )
132 #endif
133 pathtmp.resize ( pathtmp.size() - 1 );
134 }
135 else
136 pathtmp.push_back ( prev );
137 prev = p;
138 p = strtok ( NULL, "/\\" );
139 }
140 if ( include_filename )
141 pathtmp.push_back ( prev );
142
143 // reuse tmp variable to return recombined path
144 tmp.resize(0);
145 for ( size_t i = 0; i < pathtmp.size(); i++ )
146 {
147 // this squirreliness is b/c win32 has drive letters and *nix doesn't...
148 #ifdef WIN32
149 if ( i ) tmp += "/";
150 #else
151 tmp += "/";
152 #endif
153 tmp += pathtmp[i];
154 }
155 return tmp;
156 }
157
158 /*static*/ string
159 Path::RelativeFromWorkingDirectory ( const string& path )
160 {
161 vector<string> vwork, vpath, vout;
162 Path::Split ( vwork, working_directory, true );
163 Path::Split ( vpath, path, true );
164 #ifdef WIN32
165 // this squirreliness is b/c win32 has drive letters and *nix doesn't...
166 // not possible to do relative across different drive letters
167 if ( vwork[0] != vpath[0] )
168 return path;
169 #endif
170 size_t i = 0;
171 while ( i < vwork.size() && i < vpath.size() && vwork[i] == vpath[i] )
172 ++i;
173 if ( i < vwork.size() )
174 {
175 // path goes above our working directory, we will need some ..'s
176 for ( size_t j = 0; j < i; j++ )
177 vout.push_back ( ".." );
178 }
179 while ( i < vpath.size() )
180 vout.push_back ( vpath[i++] );
181
182 // now merge vout into a string again
183 string out;
184 for ( i = 0; i < vout.size(); i++ )
185 {
186 // this squirreliness is b/c win32 has drive letters and *nix doesn't...
187 #ifdef WIN32
188 if ( i ) out += "/";
189 #else
190 out += "/";
191 #endif
192 out += vout[i];
193 }
194 return out;
195 }
196
197 /*static*/ void
198 Path::Split ( vector<string>& out,
199 const string& path,
200 bool include_last )
201 {
202 string s ( path );
203 const char* prev = strtok ( &s[0], "/\\" );
204 const char* p = strtok ( NULL, "/\\" );
205 out.resize ( 0 );
206 while ( p )
207 {
208 out.push_back ( prev );
209 prev = p;
210 p = strtok ( NULL, "/\\" );
211 }
212 if ( include_last )
213 out.push_back ( prev );
214 }
215
216 XMLFile::XMLFile()
217 {
218 }
219
220 void
221 XMLFile::close()
222 {
223 _buf.resize(0);
224 _p = _end = NULL;
225 }
226
227 bool
228 XMLFile::open(const string& filename_)
229 {
230 close();
231 FILE* f = fopen ( filename_.c_str(), "rb" );
232 if ( !f )
233 return false;
234 unsigned long len = (unsigned long)filelen(f);
235 _buf.resize ( len );
236 fread ( &_buf[0], 1, len, f );
237 fclose ( f );
238 _p = _buf.c_str();
239 _end = _p + len;
240 _filename = filename_;
241 next_token();
242 return true;
243 }
244
245 // next_token() moves the pointer to next token, which may be
246 // an xml element or a text element, basically it's a glorified
247 // skipspace, normally the user of this class won't need to call
248 // this function
249 void
250 XMLFile::next_token()
251 {
252 _p += strspn ( _p, WS );
253 }
254
255 bool
256 XMLFile::next_is_text()
257 {
258 return *_p != '<';
259 }
260
261 bool
262 XMLFile::more_tokens()
263 {
264 return _p != _end;
265 }
266
267 // get_token() is used to return a token, and move the pointer
268 // past the token
269 bool
270 XMLFile::get_token(string& token)
271 {
272 const char* tokend;
273 if ( !strncmp ( _p, "<!--", 4 ) )
274 {
275 tokend = strstr ( _p, "-->" );
276 if ( !tokend )
277 tokend = _end;
278 else
279 tokend += 3;
280 }
281 else if ( !strncmp ( _p, "<?", 2 ) )
282 {
283 tokend = strstr ( _p, "?>" );
284 if ( !tokend )
285 tokend = _end;
286 else
287 tokend += 2;
288 }
289 else if ( *_p == '<' )
290 {
291 tokend = strchr ( _p, '>' );
292 if ( !tokend )
293 tokend = _end;
294 else
295 ++tokend;
296 }
297 else
298 {
299 tokend = strchr ( _p, '<' );
300 if ( !tokend )
301 tokend = _end;
302 while ( tokend > _p && isspace(tokend[-1]) )
303 --tokend;
304 }
305 if ( tokend == _p )
306 return false;
307 token = string ( _p, tokend-_p );
308 _p = tokend;
309 next_token();
310 return true;
311 }
312
313 string
314 XMLFile::Location() const
315 {
316 int line = 1;
317 const char* p = strchr ( _buf.c_str(), '\n' );
318 while ( p && p < _p )
319 {
320 ++line;
321 p = strchr ( p+1, '\n' );
322 }
323 return ssprintf ( "%s(%i)",_filename.c_str(), line );
324 }
325
326 XMLAttribute::XMLAttribute()
327 {
328 }
329
330 XMLAttribute::XMLAttribute(const string& name_,
331 const string& value_)
332 : name(name_), value(value_)
333 {
334 }
335
336 XMLAttribute::XMLAttribute ( const XMLAttribute& src )
337 : name(src.name), value(src.value)
338 {
339
340 }
341
342 XMLAttribute& XMLAttribute::operator = ( const XMLAttribute& src )
343 {
344 name = src.name;
345 value = src.value;
346 return *this;
347 }
348
349 XMLElement::XMLElement ( const string& location_ )
350 : location(location_),
351 parentElement(NULL)
352 {
353 }
354
355 XMLElement::~XMLElement()
356 {
357 size_t i;
358 for ( i = 0; i < attributes.size(); i++ )
359 delete attributes[i];
360 for ( i = 0; i < subElements.size(); i++ )
361 delete subElements[i];
362 }
363
364 void
365 XMLElement::AddSubElement ( XMLElement* e )
366 {
367 subElements.push_back ( e );
368 e->parentElement = this;
369 }
370
371 // Parse()
372 // This function takes a single xml tag ( i.e. beginning with '<' and
373 // ending with '>', and parses out it's tag name and constituent
374 // attributes.
375 // Return Value: returns true if you need to look for a </tag> for
376 // the one it just parsed...
377 bool
378 XMLElement::Parse(const string& token,
379 bool& end_tag)
380 {
381 const char* p = token.c_str();
382 assert ( *p == '<' );
383 ++p;
384 p += strspn ( p, WS );
385
386 // check if this is a comment
387 if ( !strncmp ( p, "!--", 3 ) )
388 {
389 name = "!--";
390 end_tag = false;
391 return false; // never look for end tag to a comment
392 }
393
394 end_tag = ( *p == '/' );
395 if ( end_tag )
396 {
397 ++p;
398 p += strspn ( p, WS );
399 }
400 const char* end = strpbrk ( p, WS );
401 if ( !end )
402 {
403 end = strpbrk ( p, "/>" );
404 assert ( end );
405 }
406 name = string ( p, end-p );
407 p = end;
408 p += strspn ( p, WS );
409 while ( *p != '>' && *p != '/' )
410 {
411 end = strpbrk ( p, WSEQ );
412 if ( !end )
413 {
414 end = strpbrk ( p, "/>" );
415 assert ( end );
416 }
417 string attribute ( p, end-p ), value;
418 p = end;
419 p += strspn ( p, WS );
420 if ( *p == '=' )
421 {
422 ++p;
423 p += strspn ( p, WS );
424 char quote = 0;
425 if ( strchr ( "\"'", *p ) )
426 {
427 quote = *p++;
428 end = strchr ( p, quote );
429 }
430 else
431 {
432 end = strpbrk ( p, WS );
433 }
434 if ( !end )
435 {
436 end = strchr ( p, '>' );
437 assert(end);
438 if ( end[-1] == '/' )
439 end--;
440 }
441 value = string ( p, end-p );
442 p = end;
443 if ( quote && *p == quote )
444 p++;
445 p += strspn ( p, WS );
446 }
447 else if ( name[0] != '!' )
448 {
449 throw XMLSyntaxErrorException ( location,
450 "attributes must have values" );
451 }
452 attributes.push_back ( new XMLAttribute ( attribute, value ) );
453 }
454 return !( *p == '/' ) && !end_tag;
455 }
456
457 XMLAttribute*
458 XMLElement::GetAttribute ( const string& attribute,
459 bool required )
460 {
461 // this would be faster with a tree-based container, but our attribute
462 // lists are likely to stay so short as to not be an issue.
463 for ( size_t i = 0; i < attributes.size(); i++ )
464 {
465 if ( attribute == attributes[i]->name )
466 return attributes[i];
467 }
468 if ( required )
469 {
470 throw RequiredAttributeNotFoundException ( location,
471 attribute,
472 name );
473 }
474 return NULL;
475 }
476
477 const XMLAttribute*
478 XMLElement::GetAttribute ( const string& attribute,
479 bool required ) const
480 {
481 // this would be faster with a tree-based container, but our attribute
482 // lists are likely to stay so short as to not be an issue.
483 for ( size_t i = 0; i < attributes.size(); i++ )
484 {
485 if ( attribute == attributes[i]->name )
486 return attributes[i];
487 }
488 if ( required )
489 {
490 throw RequiredAttributeNotFoundException ( location,
491 attribute,
492 name );
493 }
494 return NULL;
495 }
496
497 // XMLParse()
498 // This function reads a "token" from the file loaded in XMLFile
499 // if it finds a tag that is non-singular, it parses sub-elements and/or
500 // inner text into the XMLElement that it is building to return.
501 // Return Value: an XMLElement allocated via the new operator that contains
502 // it's parsed data. Keep calling this function until it returns NULL
503 // (no more data)
504 XMLElement*
505 XMLParse(XMLFile& f,
506 XMLIncludes* includes,
507 const Path& path,
508 bool* pend_tag = NULL )
509 {
510 string token;
511 if ( !f.get_token(token) )
512 return NULL;
513 bool end_tag, is_include = false;
514
515 while ( token[0] != '<'
516 || !strncmp ( token.c_str(), "<!--", 4 )
517 || !strncmp ( token.c_str(), "<?", 2 ) )
518 {
519 if ( token[0] != '<' )
520 throw XMLSyntaxErrorException ( f.Location(),
521 "expecting xml tag, not '%s'",
522 token.c_str() );
523 if ( !f.get_token(token) )
524 return NULL;
525 }
526
527 XMLElement* e = new XMLElement ( f.Location() );
528 bool bNeedEnd = e->Parse ( token, end_tag );
529
530 if ( e->name == "xi:include" && includes )
531 {
532 includes->push_back ( new XMLInclude ( e, path ) );
533 is_include = true;
534 }
535
536 if ( !bNeedEnd )
537 {
538 if ( pend_tag )
539 *pend_tag = end_tag;
540 else if ( end_tag )
541 {
542 delete e;
543 throw XMLSyntaxErrorException ( f.Location(),
544 "end tag '%s' not expected",
545 token.c_str() );
546 return NULL;
547 }
548 return e;
549 }
550 bool bThisMixingErrorReported = false;
551 while ( f.more_tokens() )
552 {
553 if ( f.next_is_text() )
554 {
555 if ( !f.get_token ( token ) || !token.size() )
556 {
557 throw InvalidBuildFileException (
558 f.Location(),
559 "internal tool error - get_token() failed when more_tokens() returned true" );
560 break;
561 }
562 if ( e->subElements.size() && !bThisMixingErrorReported )
563 {
564 throw XMLSyntaxErrorException ( f.Location(),
565 "mixing of inner text with sub elements" );
566 bThisMixingErrorReported = true;
567 }
568 if ( strchr ( token.c_str(), '>' ) )
569 {
570 throw XMLSyntaxErrorException ( f.Location(),
571 "invalid symbol '>'" );
572 }
573 if ( e->value.size() )
574 {
575 throw XMLSyntaxErrorException ( f.Location(),
576 "multiple instances of inner text" );
577 e->value += " " + token;
578 }
579 else
580 e->value = token;
581 }
582 else
583 {
584 XMLElement* e2 = XMLParse ( f, is_include ? NULL : includes, path, &end_tag );
585 if ( !e2 )
586 {
587 throw InvalidBuildFileException (
588 e->location,
589 "end of file found looking for end tag" );
590 break;
591 }
592 if ( end_tag )
593 {
594 if ( e->name != e2->name )
595 {
596 delete e2;
597 throw XMLSyntaxErrorException ( f.Location(),
598 "end tag name mismatch" );
599 break;
600 }
601 delete e2;
602 break;
603 }
604 if ( e->value.size() && !bThisMixingErrorReported )
605 {
606 throw XMLSyntaxErrorException ( f.Location(),
607 "mixing of inner text with sub elements" );
608 bThisMixingErrorReported = true;
609 }
610 e->AddSubElement ( e2 );
611 }
612 }
613 return e;
614 }
615
616 void
617 XMLReadFile ( XMLFile& f, XMLElement& head, XMLIncludes& includes, const Path& path )
618 {
619 for ( ;; )
620 {
621 XMLElement* e = XMLParse ( f, &includes, path );
622 if ( !e )
623 return;
624 head.AddSubElement ( e );
625 }
626 }
627
628 XMLElement*
629 XMLLoadInclude ( XMLElement* e, const Path& path, XMLIncludes& includes )
630 {
631 XMLAttribute* att;
632 att = e->GetAttribute("href",true);
633 assert(att);
634
635 string file ( path.Fixup(att->value,true) );
636 string top_file ( Path::RelativeFromWorkingDirectory ( file ) );
637 e->attributes.push_back ( new XMLAttribute ( "top_href", top_file ) );
638 XMLFile fInc;
639 if ( !fInc.open ( file ) )
640 {
641 // look for xi:fallback element
642 for ( size_t i = 0; i < e->subElements.size(); i++ )
643 {
644 XMLElement* e2 = e->subElements[i];
645 if ( e2->name == "xi:fallback" )
646 {
647 // now look for xi:include below...
648 for ( i = 0; i < e2->subElements.size(); i++ )
649 {
650 XMLElement* e3 = e2->subElements[i];
651 if ( e3->name == "xi:include" )
652 {
653 return XMLLoadInclude ( e3, path, includes );
654 }
655 }
656 throw InvalidBuildFileException (
657 e2->location,
658 "<xi:fallback> must have a <xi:include> sub-element" );
659 return NULL;
660 }
661 }
662 return NULL;
663 }
664 else
665 {
666 XMLElement* new_e = new XMLElement ( e->location );
667 new_e->name = "xi:included";
668 Path path2 ( path, att->value );
669 XMLReadFile ( fInc, *new_e, includes, path2 );
670 return new_e;
671 }
672 }
673
674 XMLElement*
675 XMLLoadFile ( const string& filename, const Path& path )
676 {
677 XMLIncludes includes;
678 XMLFile f;
679
680 if ( !f.open ( filename ) )
681 throw FileNotFoundException ( filename );
682
683 XMLElement* head = new XMLElement("(virtual)");
684
685 XMLReadFile ( f, *head, includes, path );
686
687 for ( size_t i = 0; i < includes.size(); i++ )
688 {
689 XMLElement* e = includes[i]->e;
690 XMLElement* e2 = XMLLoadInclude ( includes[i]->e, includes[i]->path, includes );
691 if ( !e2 )
692 {
693 throw FileNotFoundException (
694 ssprintf("%s (referenced from %s)",
695 e->GetAttribute("top_href",true)->value.c_str(),
696 f.Location().c_str() ) );
697 }
698 XMLElement* parent = e->parentElement;
699 XMLElement** parent_container = NULL;
700 if ( !parent )
701 {
702 delete e;
703 throw Exception ( "internal tool error: xi:include doesn't have a parent" );
704 return NULL;
705 }
706 for ( size_t j = 0; j < parent->subElements.size(); j++ )
707 {
708 if ( parent->subElements[j] == e )
709 {
710 parent_container = &parent->subElements[j];
711 break;
712 }
713 }
714 if ( !parent_container )
715 {
716 delete e;
717 throw Exception ( "internal tool error: couldn't find xi:include in parent's sub-elements" );
718 return NULL;
719 }
720 // replace inclusion tree with the imported tree
721 e2->parentElement = e->parentElement;
722 e2->name = e->name;
723 e2->attributes = e->attributes;
724 *parent_container = e2;
725 e->attributes.resize(0);
726 delete e;
727 }
728 return head;
729 }