make sure parentElement is set properly for <xi:include> elements
[reactos.git] / reactos / tools / rbuild / XML.cpp
1 // XML.cpp
2
3 #include "pch.h"
4
5 #include <direct.h>
6 #include <io.h>
7 #include <assert.h>
8
9 #include "XML.h"
10 #include "exception.h"
11 #include "ssprintf.h"
12
13 using std::string;
14 using std::vector;
15
16 #ifdef WIN32
17 #define getcwd _getcwd
18 #endif//WIN32
19
20 static const char* WS = " \t\r\n";
21 static const char* WSEQ = " =\t\r\n";
22
23 string working_directory;
24
25 class XMLInclude
26 {
27 public:
28 XMLElement *e;
29 Path path;
30
31 XMLInclude ( XMLElement* e_, const Path& path_ )
32 : e(e_), path(path_)
33 {
34 }
35 };
36
37 class XMLIncludes : public vector<XMLInclude*>
38 {
39 public:
40 ~XMLIncludes()
41 {
42 for ( size_t i = 0; i < this->size(); i++ )
43 delete (*this)[i];
44 }
45 };
46
47 void
48 InitWorkingDirectory()
49 {
50 // store the current directory for path calculations
51 working_directory.resize ( _MAX_PATH );
52 working_directory[0] = 0;
53 getcwd ( &working_directory[0], working_directory.size() );
54 working_directory.resize ( strlen ( working_directory.c_str() ) );
55 }
56
57 #ifdef _MSC_VER
58 unsigned __int64
59 #else
60 unsigned long long
61 #endif
62 filelen ( FILE* f )
63 {
64 #ifdef WIN32
65 return _filelengthi64 ( _fileno(f) );
66 #elif defined(UNIX)
67 struct stat64 file_stat;
68 if ( fstat64(fileno(f), &file_stat) != 0 )
69 return 0;
70 return file_stat.st_size;
71 #endif
72 }
73
74 Path::Path()
75 {
76 if ( !working_directory.size() )
77 InitWorkingDirectory();
78 string s ( working_directory );
79 const char* p = strtok ( &s[0], "/\\" );
80 while ( p )
81 {
82 if ( *p )
83 path.push_back ( p );
84 p = strtok ( NULL, "/\\" );
85 }
86 }
87
88 Path::Path ( const Path& cwd, const string& file )
89 {
90 string s ( cwd.Fixup ( file, false ) );
91 const char* p = strtok ( &s[0], "/\\" );
92 while ( p )
93 {
94 if ( *p )
95 path.push_back ( p );
96 p = strtok ( NULL, "/\\" );
97 }
98 }
99
100 string
101 Path::Fixup ( const string& file, bool include_filename ) const
102 {
103 if ( strchr ( "/\\", file[0] )
104 #ifdef WIN32
105 // this squirreliness is b/c win32 has drive letters and *nix doesn't...
106 || file[1] == ':'
107 #endif//WIN32
108 )
109 {
110 return file;
111 }
112 vector<string> pathtmp ( path );
113 string tmp ( file );
114 const char* prev = strtok ( &tmp[0], "/\\" );
115 const char* p = strtok ( NULL, "/\\" );
116 while ( p )
117 {
118 if ( !strcmp ( prev, "." ) )
119 ; // do nothing
120 else if ( !strcmp ( prev, ".." ) )
121 {
122 // this squirreliness is b/c win32 has drive letters and *nix doesn't...
123 #ifdef WIN32
124 if ( pathtmp.size() > 1 )
125 #else
126 if ( pathtmp.size() )
127 #endif
128 pathtmp.resize ( pathtmp.size() - 1 );
129 }
130 else
131 pathtmp.push_back ( prev );
132 prev = p;
133 p = strtok ( NULL, "/\\" );
134 }
135 if ( include_filename )
136 pathtmp.push_back ( prev );
137
138 // reuse tmp variable to return recombined path
139 tmp.resize(0);
140 for ( size_t i = 0; i < pathtmp.size(); i++ )
141 {
142 // this squirreliness is b/c win32 has drive letters and *nix doesn't...
143 #ifdef WIN32
144 if ( i ) tmp += "/";
145 #else
146 tmp += "/";
147 #endif
148 tmp += pathtmp[i];
149 }
150 return tmp;
151 }
152
153 /*static*/ string
154 Path::RelativeFromWorkingDirectory ( const string& path )
155 {
156 vector<string> vwork, vpath, vout;
157 Path::Split ( vwork, working_directory, true );
158 Path::Split ( vpath, path, true );
159 #ifdef WIN32
160 // this squirreliness is b/c win32 has drive letters and *nix doesn't...
161 // not possible to do relative across different drive letters
162 if ( vwork[0] != vpath[0] )
163 return path;
164 #endif
165 size_t i = 0;
166 while ( i < vwork.size() && i < vpath.size() && vwork[i] == vpath[i] )
167 ++i;
168 if ( i < vwork.size() )
169 {
170 // path goes above our working directory, we will need some ..'s
171 for ( size_t j = 0; j < i; j++ )
172 vout.push_back ( ".." );
173 }
174 while ( i < vpath.size() )
175 vout.push_back ( vpath[i++] );
176
177 // now merge vout into a string again
178 string out;
179 for ( i = 0; i < vout.size(); i++ )
180 {
181 // this squirreliness is b/c win32 has drive letters and *nix doesn't...
182 #ifdef WIN32
183 if ( i ) out += "/";
184 #else
185 out += "/";
186 #endif
187 out += vout[i];
188 }
189 return out;
190 }
191
192 /*static*/ void
193 Path::Split ( vector<string>& out,
194 const string& path,
195 bool include_last )
196 {
197 string s ( path );
198 const char* prev = strtok ( &s[0], "/\\" );
199 const char* p = strtok ( NULL, "/\\" );
200 out.resize ( 0 );
201 while ( p )
202 {
203 out.push_back ( prev );
204 prev = p;
205 p = strtok ( NULL, "/\\" );
206 }
207 if ( include_last )
208 out.push_back ( prev );
209 }
210
211 XMLFile::XMLFile()
212 {
213 }
214
215 void
216 XMLFile::close()
217 {
218 _buf.resize(0);
219 _p = _end = NULL;
220 }
221
222 bool
223 XMLFile::open(const string& filename_)
224 {
225 close();
226 FILE* f = fopen ( filename_.c_str(), "rb" );
227 if ( !f )
228 return false;
229 unsigned long len = (unsigned long)filelen(f);
230 _buf.resize ( len );
231 fread ( &_buf[0], 1, len, f );
232 fclose ( f );
233 _p = _buf.c_str();
234 _end = _p + len;
235 _filename = filename_;
236 next_token();
237 return true;
238 }
239
240 // next_token() moves the pointer to next token, which may be
241 // an xml element or a text element, basically it's a glorified
242 // skipspace, normally the user of this class won't need to call
243 // this function
244 void
245 XMLFile::next_token()
246 {
247 _p += strspn ( _p, WS );
248 }
249
250 bool
251 XMLFile::next_is_text()
252 {
253 return *_p != '<';
254 }
255
256 bool
257 XMLFile::more_tokens()
258 {
259 return _p != _end;
260 }
261
262 // get_token() is used to return a token, and move the pointer
263 // past the token
264 bool
265 XMLFile::get_token(string& token)
266 {
267 const char* tokend;
268 if ( !strncmp ( _p, "<!--", 4 ) )
269 {
270 tokend = strstr ( _p, "-->" );
271 if ( !tokend )
272 tokend = _end;
273 else
274 tokend += 3;
275 }
276 else if ( !strncmp ( _p, "<?", 2 ) )
277 {
278 tokend = strstr ( _p, "?>" );
279 if ( !tokend )
280 tokend = _end;
281 else
282 tokend += 2;
283 }
284 else if ( *_p == '<' )
285 {
286 tokend = strchr ( _p, '>' );
287 if ( !tokend )
288 tokend = _end;
289 else
290 ++tokend;
291 }
292 else
293 {
294 tokend = strchr ( _p, '<' );
295 if ( !tokend )
296 tokend = _end;
297 while ( tokend > _p && isspace(tokend[-1]) )
298 --tokend;
299 }
300 if ( tokend == _p )
301 return false;
302 token = string ( _p, tokend-_p );
303 _p = tokend;
304 next_token();
305 return true;
306 }
307
308 string
309 XMLFile::Location() const
310 {
311 int line = 1;
312 const char* p = strchr ( _buf.c_str(), '\n' );
313 while ( p && p < _p )
314 {
315 ++line;
316 p = strchr ( p+1, '\n' );
317 }
318 return ssprintf ( "%s(%i)",_filename.c_str(), line );
319 }
320
321 XMLAttribute::XMLAttribute()
322 {
323 }
324
325 XMLAttribute::XMLAttribute(const string& name_,
326 const string& value_)
327 : name(name_), value(value_)
328 {
329 }
330
331 XMLAttribute::XMLAttribute ( const XMLAttribute& src )
332 : name(src.name), value(src.value)
333 {
334
335 }
336
337 XMLAttribute& XMLAttribute::operator = ( const XMLAttribute& src )
338 {
339 name = src.name;
340 value = src.value;
341 return *this;
342 }
343
344 XMLElement::XMLElement ( const string& location_ )
345 : location(location_),
346 parentElement(NULL)
347 {
348 }
349
350 XMLElement::~XMLElement()
351 {
352 size_t i;
353 for ( i = 0; i < attributes.size(); i++ )
354 delete attributes[i];
355 for ( i = 0; i < subElements.size(); i++ )
356 delete subElements[i];
357 }
358
359 void
360 XMLElement::AddSubElement ( XMLElement* e )
361 {
362 subElements.push_back ( e );
363 e->parentElement = this;
364 }
365
366 // Parse()
367 // This function takes a single xml tag ( i.e. beginning with '<' and
368 // ending with '>', and parses out it's tag name and constituent
369 // attributes.
370 // Return Value: returns true if you need to look for a </tag> for
371 // the one it just parsed...
372 bool
373 XMLElement::Parse(const string& token,
374 bool& end_tag)
375 {
376 const char* p = token.c_str();
377 assert ( *p == '<' );
378 ++p;
379 p += strspn ( p, WS );
380
381 // check if this is a comment
382 if ( !strncmp ( p, "!--", 3 ) )
383 {
384 name = "!--";
385 end_tag = false;
386 return false; // never look for end tag to a comment
387 }
388
389 end_tag = ( *p == '/' );
390 if ( end_tag )
391 {
392 ++p;
393 p += strspn ( p, WS );
394 }
395 const char* end = strpbrk ( p, WS );
396 if ( !end )
397 {
398 end = strpbrk ( p, "/>" );
399 assert ( end );
400 }
401 name = string ( p, end-p );
402 p = end;
403 p += strspn ( p, WS );
404 while ( *p != '>' && *p != '/' )
405 {
406 end = strpbrk ( p, WSEQ );
407 if ( !end )
408 {
409 end = strpbrk ( p, "/>" );
410 assert ( end );
411 }
412 string attribute ( p, end-p ), value;
413 p = end;
414 p += strspn ( p, WS );
415 if ( *p == '=' )
416 {
417 ++p;
418 p += strspn ( p, WS );
419 char quote = 0;
420 if ( strchr ( "\"'", *p ) )
421 {
422 quote = *p++;
423 end = strchr ( p, quote );
424 }
425 else
426 {
427 end = strpbrk ( p, WS );
428 }
429 if ( !end )
430 {
431 end = strchr ( p, '>' );
432 assert(end);
433 if ( end[-1] == '/' )
434 end--;
435 }
436 value = string ( p, end-p );
437 p = end;
438 if ( quote && *p == quote )
439 p++;
440 p += strspn ( p, WS );
441 }
442 attributes.push_back ( new XMLAttribute ( attribute, value ) );
443 }
444 return !( *p == '/' ) && !end_tag;
445 }
446
447 XMLAttribute*
448 XMLElement::GetAttribute ( const string& attribute,
449 bool required )
450 {
451 // this would be faster with a tree-based container, but our attribute
452 // lists are likely to stay so short as to not be an issue.
453 for ( size_t i = 0; i < attributes.size(); i++ )
454 {
455 if ( attribute == attributes[i]->name )
456 return attributes[i];
457 }
458 if ( required )
459 {
460 throw RequiredAttributeNotFoundException ( location,
461 attribute,
462 name );
463 }
464 return NULL;
465 }
466
467 const XMLAttribute*
468 XMLElement::GetAttribute ( const string& attribute,
469 bool required ) const
470 {
471 // this would be faster with a tree-based container, but our attribute
472 // lists are likely to stay so short as to not be an issue.
473 for ( size_t i = 0; i < attributes.size(); i++ )
474 {
475 if ( attribute == attributes[i]->name )
476 return attributes[i];
477 }
478 if ( required )
479 {
480 throw RequiredAttributeNotFoundException ( location,
481 attribute,
482 name );
483 }
484 return NULL;
485 }
486
487 // XMLParse()
488 // This function reads a "token" from the file loaded in XMLFile
489 // if it finds a tag that is non-singular, it parses sub-elements and/or
490 // inner text into the XMLElement that it is building to return.
491 // Return Value: an XMLElement allocated via the new operator that contains
492 // it's parsed data. Keep calling this function until it returns NULL
493 // (no more data)
494 XMLElement*
495 XMLParse(XMLFile& f,
496 XMLIncludes* includes,
497 const Path& path,
498 bool* pend_tag = NULL )
499 {
500 string token;
501 if ( !f.get_token(token) )
502 return NULL;
503 bool end_tag, is_include = false;
504
505 while ( token[0] != '<'
506 || !strncmp ( token.c_str(), "<!--", 4 )
507 || !strncmp ( token.c_str(), "<?", 2 ) )
508 {
509 if ( token[0] != '<' )
510 throw XMLSyntaxErrorException ( f.Location(),
511 "expecting xml tag, not '%s'",
512 token.c_str() );
513 if ( !f.get_token(token) )
514 return NULL;
515 }
516
517 XMLElement* e = new XMLElement ( f.Location() );
518 bool bNeedEnd = e->Parse ( token, end_tag );
519
520 if ( e->name == "xi:include" && includes )
521 {
522 includes->push_back ( new XMLInclude ( e, path ) );
523 is_include = true;
524 }
525
526 if ( !bNeedEnd )
527 {
528 if ( pend_tag )
529 *pend_tag = end_tag;
530 else if ( end_tag )
531 {
532 delete e;
533 throw XMLSyntaxErrorException ( f.Location(),
534 "end tag '%s' not expected",
535 token.c_str() );
536 return NULL;
537 }
538 return e;
539 }
540 bool bThisMixingErrorReported = false;
541 while ( f.more_tokens() )
542 {
543 if ( f.next_is_text() )
544 {
545 if ( !f.get_token ( token ) || !token.size() )
546 {
547 throw InvalidBuildFileException (
548 f.Location(),
549 "internal tool error - get_token() failed when more_tokens() returned true" );
550 break;
551 }
552 if ( e->subElements.size() && !bThisMixingErrorReported )
553 {
554 throw XMLSyntaxErrorException ( f.Location(),
555 "mixing of inner text with sub elements" );
556 bThisMixingErrorReported = true;
557 }
558 if ( strchr ( token.c_str(), '>' ) )
559 {
560 throw XMLSyntaxErrorException ( f.Location(),
561 "invalid symbol '>'" );
562 }
563 if ( e->value.size() )
564 {
565 throw XMLSyntaxErrorException ( f.Location(),
566 "multiple instances of inner text" );
567 e->value += " " + token;
568 }
569 else
570 e->value = token;
571 }
572 else
573 {
574 XMLElement* e2 = XMLParse ( f, is_include ? NULL : includes, path, &end_tag );
575 if ( !e2 )
576 {
577 throw InvalidBuildFileException (
578 e->location,
579 "end of file found looking for end tag" );
580 break;
581 }
582 if ( end_tag )
583 {
584 if ( e->name != e2->name )
585 {
586 delete e2;
587 throw XMLSyntaxErrorException ( f.Location(),
588 "end tag name mismatch" );
589 break;
590 }
591 delete e2;
592 break;
593 }
594 if ( e->value.size() && !bThisMixingErrorReported )
595 {
596 throw XMLSyntaxErrorException ( f.Location(),
597 "mixing of inner text with sub elements" );
598 bThisMixingErrorReported = true;
599 }
600 e->AddSubElement ( e2 );
601 }
602 }
603 return e;
604 }
605
606 void
607 XMLReadFile ( XMLFile& f, XMLElement& head, XMLIncludes& includes, const Path& path )
608 {
609 for ( ;; )
610 {
611 XMLElement* e = XMLParse ( f, &includes, path );
612 if ( !e )
613 return;
614 head.AddSubElement ( e );
615 }
616 }
617
618 XMLElement*
619 XMLLoadInclude ( XMLElement* e, const Path& path, XMLIncludes& includes )
620 {
621 // TODO FIXME
622 XMLAttribute* att;
623 att = e->GetAttribute("href",true);
624 assert(att);
625
626 string file ( path.Fixup(att->value,true) );
627 string top_file ( Path::RelativeFromWorkingDirectory ( file ) );
628 e->attributes.push_back ( new XMLAttribute ( "top_href", top_file ) );
629 XMLFile fInc;
630 if ( !fInc.open ( file ) )
631 {
632 // look for xi:fallback element
633 for ( size_t i = 0; i < e->subElements.size(); i++ )
634 {
635 XMLElement* e2 = e->subElements[i];
636 if ( e2->name == "xi:fallback" )
637 {
638 // now look for xi:include below...
639 for ( i = 0; i < e2->subElements.size(); i++ )
640 {
641 XMLElement* e3 = e2->subElements[i];
642 if ( e3->name == "xi:include" )
643 {
644 return XMLLoadInclude ( e3, path, includes );
645 }
646 }
647 throw InvalidBuildFileException (
648 e2->location,
649 "<xi:fallback> must have a <xi:include> sub-element" );
650 return NULL;
651 }
652 }
653 return NULL;
654 }
655 else
656 {
657 XMLElement* new_e = new XMLElement ( e->location );
658 new_e->name = "xi:included";
659 Path path2 ( path, att->value );
660 XMLReadFile ( fInc, *new_e, includes, path2 );
661 return new_e;
662 }
663 }
664
665 XMLElement*
666 XMLLoadFile ( const string& filename, const Path& path )
667 {
668 XMLIncludes includes;
669 XMLFile f;
670
671 if ( !f.open ( filename ) )
672 throw FileNotFoundException ( filename );
673
674 XMLElement* head = new XMLElement("(virtual)");
675
676 XMLReadFile ( f, *head, includes, path );
677
678 for ( size_t i = 0; i < includes.size(); i++ )
679 {
680 XMLElement* e = includes[i]->e;
681 XMLElement* e2 = XMLLoadInclude ( includes[i]->e, includes[i]->path, includes );
682 if ( !e2 )
683 {
684 throw FileNotFoundException (
685 ssprintf("%s (referenced from %s)",
686 e->GetAttribute("top_href",true)->value.c_str(),
687 f.Location().c_str() ) );
688 }
689 XMLElement* parent = e->parentElement;
690 XMLElement** parent_container = NULL;
691 if ( !parent )
692 {
693 delete e;
694 throw Exception ( "internal tool error: xi:include doesn't have a parent" );
695 return NULL;
696 }
697 for ( size_t j = 0; j < parent->subElements.size(); j++ )
698 {
699 if ( parent->subElements[j] == e )
700 {
701 parent_container = &parent->subElements[j];
702 break;
703 }
704 }
705 if ( !parent_container )
706 {
707 delete e;
708 throw Exception ( "internal tool error: couldn't find xi:include in parent's sub-elements" );
709 return NULL;
710 }
711 // replace inclusion tree with the imported tree
712 e2->parentElement = e->parentElement;
713 e2->name = e->name;
714 e2->attributes = e->attributes;
715 *parent_container = e2;
716 e->attributes.resize(0);
717 delete e;
718 }
719 return head;
720 }