f4cec1e401347b961bdc92e7c784ae6e1888e751
[reactos.git] / reactos / tools / rbuild / XML.cpp
1 // XML.cpp
2
3 #include "pch.h"
4
5 #ifdef _MSC_VER
6 #define MAX_PATH _MAX_PATH
7 #endif
8
9 #ifdef WIN32
10 # include <direct.h>
11 # include <io.h>
12 #else
13 # include <sys/stat.h>
14 # define MAX_PATH PATH_MAX
15 #endif
16 #include <assert.h>
17
18 #include "XML.h"
19 #include "exception.h"
20 #include "ssprintf.h"
21
22 using std::string;
23 using std::vector;
24
25 #ifdef WIN32
26 #define getcwd _getcwd
27 #endif//WIN32
28
29 static const char* WS = " \t\r\n";
30 static const char* WSEQ = " =\t\r\n";
31
32 string working_directory;
33
34 class XMLInclude
35 {
36 public:
37 XMLElement *e;
38 Path path;
39
40 XMLInclude ( XMLElement* e_, const Path& path_ )
41 : e(e_), path(path_)
42 {
43 }
44 };
45
46 class XMLIncludes : public vector<XMLInclude*>
47 {
48 public:
49 ~XMLIncludes()
50 {
51 for ( size_t i = 0; i < this->size(); i++ )
52 delete (*this)[i];
53 }
54 };
55
56 void
57 InitWorkingDirectory()
58 {
59 // store the current directory for path calculations
60 working_directory.resize ( _MAX_PATH );
61 working_directory[0] = 0;
62 getcwd ( &working_directory[0], working_directory.size() );
63 working_directory.resize ( strlen ( working_directory.c_str() ) );
64 }
65
66 #ifdef _MSC_VER
67 unsigned __int64
68 #else
69 unsigned long long
70 #endif
71 filelen ( FILE* f )
72 {
73 #ifdef WIN32
74 return _filelengthi64 ( _fileno(f) );
75 #else
76 struct stat64 file_stat;
77 if ( fstat64(fileno(f), &file_stat) != 0 )
78 return 0;
79 return file_stat.st_size;
80 #endif
81 }
82
83 Path::Path()
84 {
85 if ( !working_directory.size() )
86 InitWorkingDirectory();
87 string s ( working_directory );
88 const char* p = strtok ( &s[0], "/\\" );
89 while ( p )
90 {
91 if ( *p )
92 path.push_back ( p );
93 p = strtok ( NULL, "/\\" );
94 }
95 }
96
97 Path::Path ( const Path& cwd, const string& file )
98 {
99 string s ( cwd.Fixup ( file, false ) );
100 const char* p = strtok ( &s[0], "/\\" );
101 while ( p )
102 {
103 if ( *p )
104 path.push_back ( p );
105 p = strtok ( NULL, "/\\" );
106 }
107 }
108
109 string
110 Path::Fixup ( const string& file, bool include_filename ) const
111 {
112 if ( strchr ( "/\\", file[0] )
113 #ifdef WIN32
114 // this squirreliness is b/c win32 has drive letters and *nix doesn't...
115 || file[1] == ':'
116 #endif//WIN32
117 )
118 {
119 return file;
120 }
121 vector<string> pathtmp ( path );
122 string tmp ( file );
123 const char* prev = strtok ( &tmp[0], "/\\" );
124 const char* p = strtok ( NULL, "/\\" );
125 while ( p )
126 {
127 if ( !strcmp ( prev, "." ) )
128 ; // do nothing
129 else if ( !strcmp ( prev, ".." ) )
130 {
131 // this squirreliness is b/c win32 has drive letters and *nix doesn't...
132 #ifdef WIN32
133 if ( pathtmp.size() > 1 )
134 #else
135 if ( pathtmp.size() )
136 #endif
137 pathtmp.resize ( pathtmp.size() - 1 );
138 }
139 else
140 pathtmp.push_back ( prev );
141 prev = p;
142 p = strtok ( NULL, "/\\" );
143 }
144 if ( include_filename )
145 pathtmp.push_back ( prev );
146
147 // reuse tmp variable to return recombined path
148 tmp.resize(0);
149 for ( size_t i = 0; i < pathtmp.size(); i++ )
150 {
151 // this squirreliness is b/c win32 has drive letters and *nix doesn't...
152 #ifdef WIN32
153 if ( i ) tmp += "/";
154 #else
155 tmp += "/";
156 #endif
157 tmp += pathtmp[i];
158 }
159 return tmp;
160 }
161
162 /*static*/ string
163 Path::RelativeFromWorkingDirectory ( const string& path )
164 {
165 vector<string> vwork, vpath, vout;
166 Path::Split ( vwork, working_directory, true );
167 Path::Split ( vpath, path, true );
168 #ifdef WIN32
169 // this squirreliness is b/c win32 has drive letters and *nix doesn't...
170 // not possible to do relative across different drive letters
171 if ( vwork[0] != vpath[0] )
172 return path;
173 #endif
174 size_t i = 0;
175 while ( i < vwork.size() && i < vpath.size() && vwork[i] == vpath[i] )
176 ++i;
177 if ( i < vwork.size() )
178 {
179 // path goes above our working directory, we will need some ..'s
180 for ( size_t j = 0; j < i; j++ )
181 vout.push_back ( ".." );
182 }
183 while ( i < vpath.size() )
184 vout.push_back ( vpath[i++] );
185
186 // now merge vout into a string again
187 string out = ".";
188 for ( i = 0; i < vout.size(); i++ )
189 {
190 out += "/" + vout[i];
191 }
192 return out;
193 }
194
195 /*static*/ void
196 Path::Split ( vector<string>& out,
197 const string& path,
198 bool include_last )
199 {
200 string s ( path );
201 const char* prev = strtok ( &s[0], "/\\" );
202 const char* p = strtok ( NULL, "/\\" );
203 out.resize ( 0 );
204 while ( p )
205 {
206 out.push_back ( prev );
207 prev = p;
208 p = strtok ( NULL, "/\\" );
209 }
210 if ( include_last )
211 out.push_back ( prev );
212 }
213
214 XMLFile::XMLFile()
215 {
216 }
217
218 void
219 XMLFile::close()
220 {
221 _buf.resize(0);
222 _p = _end = NULL;
223 }
224
225 bool
226 XMLFile::open(const string& filename_)
227 {
228 close();
229 FILE* f = fopen ( filename_.c_str(), "rb" );
230 if ( !f )
231 return false;
232 unsigned long len = (unsigned long)filelen(f);
233 _buf.resize ( len );
234 fread ( &_buf[0], 1, len, f );
235 fclose ( f );
236 _p = _buf.c_str();
237 _end = _p + len;
238 _filename = filename_;
239 next_token();
240 return true;
241 }
242
243 // next_token() moves the pointer to next token, which may be
244 // an xml element or a text element, basically it's a glorified
245 // skipspace, normally the user of this class won't need to call
246 // this function
247 void
248 XMLFile::next_token()
249 {
250 _p += strspn ( _p, WS );
251 }
252
253 bool
254 XMLFile::next_is_text()
255 {
256 return *_p != '<';
257 }
258
259 bool
260 XMLFile::more_tokens()
261 {
262 return _p != _end;
263 }
264
265 // get_token() is used to return a token, and move the pointer
266 // past the token
267 bool
268 XMLFile::get_token(string& token)
269 {
270 const char* tokend;
271 if ( !strncmp ( _p, "<!--", 4 ) )
272 {
273 tokend = strstr ( _p, "-->" );
274 if ( !tokend )
275 tokend = _end;
276 else
277 tokend += 3;
278 }
279 else if ( !strncmp ( _p, "<?", 2 ) )
280 {
281 tokend = strstr ( _p, "?>" );
282 if ( !tokend )
283 tokend = _end;
284 else
285 tokend += 2;
286 }
287 else if ( *_p == '<' )
288 {
289 tokend = strchr ( _p, '>' );
290 if ( !tokend )
291 tokend = _end;
292 else
293 ++tokend;
294 }
295 else
296 {
297 tokend = strchr ( _p, '<' );
298 if ( !tokend )
299 tokend = _end;
300 while ( tokend > _p && isspace(tokend[-1]) )
301 --tokend;
302 }
303 if ( tokend == _p )
304 return false;
305 token = string ( _p, tokend-_p );
306 _p = tokend;
307 next_token();
308 return true;
309 }
310
311 string
312 XMLFile::Location() const
313 {
314 int line = 1;
315 const char* p = strchr ( _buf.c_str(), '\n' );
316 while ( p && p < _p )
317 {
318 ++line;
319 p = strchr ( p+1, '\n' );
320 }
321 return ssprintf ( "%s(%i)",_filename.c_str(), line );
322 }
323
324 XMLAttribute::XMLAttribute()
325 {
326 }
327
328 XMLAttribute::XMLAttribute(const string& name_,
329 const string& value_)
330 : name(name_), value(value_)
331 {
332 }
333
334 XMLAttribute::XMLAttribute ( const XMLAttribute& src )
335 : name(src.name), value(src.value)
336 {
337
338 }
339
340 XMLAttribute& XMLAttribute::operator = ( const XMLAttribute& src )
341 {
342 name = src.name;
343 value = src.value;
344 return *this;
345 }
346
347 XMLElement::XMLElement ( const string& location_ )
348 : location(location_),
349 parentElement(NULL)
350 {
351 }
352
353 XMLElement::~XMLElement()
354 {
355 size_t i;
356 for ( i = 0; i < attributes.size(); i++ )
357 delete attributes[i];
358 for ( i = 0; i < subElements.size(); i++ )
359 delete subElements[i];
360 }
361
362 void
363 XMLElement::AddSubElement ( XMLElement* e )
364 {
365 subElements.push_back ( e );
366 e->parentElement = this;
367 }
368
369 // Parse()
370 // This function takes a single xml tag ( i.e. beginning with '<' and
371 // ending with '>', and parses out it's tag name and constituent
372 // attributes.
373 // Return Value: returns true if you need to look for a </tag> for
374 // the one it just parsed...
375 bool
376 XMLElement::Parse(const string& token,
377 bool& end_tag)
378 {
379 const char* p = token.c_str();
380 assert ( *p == '<' );
381 ++p;
382 p += strspn ( p, WS );
383
384 // check if this is a comment
385 if ( !strncmp ( p, "!--", 3 ) )
386 {
387 name = "!--";
388 end_tag = false;
389 return false; // never look for end tag to a comment
390 }
391
392 end_tag = ( *p == '/' );
393 if ( end_tag )
394 {
395 ++p;
396 p += strspn ( p, WS );
397 }
398 const char* end = strpbrk ( p, WS );
399 if ( !end )
400 {
401 end = strpbrk ( p, "/>" );
402 assert ( end );
403 }
404 name = string ( p, end-p );
405 p = end;
406 p += strspn ( p, WS );
407 while ( *p != '>' && *p != '/' )
408 {
409 end = strpbrk ( p, WSEQ );
410 if ( !end )
411 {
412 end = strpbrk ( p, "/>" );
413 assert ( end );
414 }
415 string attribute ( p, end-p ), value;
416 p = end;
417 p += strspn ( p, WS );
418 if ( *p == '=' )
419 {
420 ++p;
421 p += strspn ( p, WS );
422 char quote = 0;
423 if ( strchr ( "\"'", *p ) )
424 {
425 quote = *p++;
426 end = strchr ( p, quote );
427 }
428 else
429 {
430 end = strpbrk ( p, WS );
431 }
432 if ( !end )
433 {
434 end = strchr ( p, '>' );
435 assert(end);
436 if ( end[-1] == '/' )
437 end--;
438 }
439 value = string ( p, end-p );
440 p = end;
441 if ( quote && *p == quote )
442 p++;
443 p += strspn ( p, WS );
444 }
445 else if ( name[0] != '!' )
446 {
447 throw XMLSyntaxErrorException ( location,
448 "attributes must have values" );
449 }
450 attributes.push_back ( new XMLAttribute ( attribute, value ) );
451 }
452 return !( *p == '/' ) && !end_tag;
453 }
454
455 XMLAttribute*
456 XMLElement::GetAttribute ( const string& attribute,
457 bool required )
458 {
459 // this would be faster with a tree-based container, but our attribute
460 // lists are likely to stay so short as to not be an issue.
461 for ( size_t i = 0; i < attributes.size(); i++ )
462 {
463 if ( attribute == attributes[i]->name )
464 return attributes[i];
465 }
466 if ( required )
467 {
468 throw RequiredAttributeNotFoundException ( location,
469 attribute,
470 name );
471 }
472 return NULL;
473 }
474
475 const XMLAttribute*
476 XMLElement::GetAttribute ( const string& attribute,
477 bool required ) const
478 {
479 // this would be faster with a tree-based container, but our attribute
480 // lists are likely to stay so short as to not be an issue.
481 for ( size_t i = 0; i < attributes.size(); i++ )
482 {
483 if ( attribute == attributes[i]->name )
484 return attributes[i];
485 }
486 if ( required )
487 {
488 throw RequiredAttributeNotFoundException ( location,
489 attribute,
490 name );
491 }
492 return NULL;
493 }
494
495 // XMLParse()
496 // This function reads a "token" from the file loaded in XMLFile
497 // if it finds a tag that is non-singular, it parses sub-elements and/or
498 // inner text into the XMLElement that it is building to return.
499 // Return Value: an XMLElement allocated via the new operator that contains
500 // it's parsed data. Keep calling this function until it returns NULL
501 // (no more data)
502 XMLElement*
503 XMLParse(XMLFile& f,
504 XMLIncludes* includes,
505 const Path& path,
506 bool* pend_tag = NULL )
507 {
508 string token;
509 if ( !f.get_token(token) )
510 return NULL;
511 bool end_tag, is_include = false;
512
513 while ( token[0] != '<'
514 || !strncmp ( token.c_str(), "<!--", 4 )
515 || !strncmp ( token.c_str(), "<?", 2 ) )
516 {
517 if ( token[0] != '<' )
518 throw XMLSyntaxErrorException ( f.Location(),
519 "expecting xml tag, not '%s'",
520 token.c_str() );
521 if ( !f.get_token(token) )
522 return NULL;
523 }
524
525 XMLElement* e = new XMLElement ( f.Location() );
526 bool bNeedEnd = e->Parse ( token, end_tag );
527
528 if ( e->name == "xi:include" && includes )
529 {
530 includes->push_back ( new XMLInclude ( e, path ) );
531 is_include = true;
532 }
533
534 if ( !bNeedEnd )
535 {
536 if ( pend_tag )
537 *pend_tag = end_tag;
538 else if ( end_tag )
539 {
540 delete e;
541 throw XMLSyntaxErrorException ( f.Location(),
542 "end tag '%s' not expected",
543 token.c_str() );
544 return NULL;
545 }
546 return e;
547 }
548 bool bThisMixingErrorReported = false;
549 while ( f.more_tokens() )
550 {
551 if ( f.next_is_text() )
552 {
553 if ( !f.get_token ( token ) || !token.size() )
554 {
555 throw InvalidBuildFileException (
556 f.Location(),
557 "internal tool error - get_token() failed when more_tokens() returned true" );
558 break;
559 }
560 if ( e->subElements.size() && !bThisMixingErrorReported )
561 {
562 throw XMLSyntaxErrorException ( f.Location(),
563 "mixing of inner text with sub elements" );
564 bThisMixingErrorReported = true;
565 }
566 if ( strchr ( token.c_str(), '>' ) )
567 {
568 throw XMLSyntaxErrorException ( f.Location(),
569 "invalid symbol '>'" );
570 }
571 if ( e->value.size() )
572 {
573 throw XMLSyntaxErrorException ( f.Location(),
574 "multiple instances of inner text" );
575 e->value += " " + token;
576 }
577 else
578 e->value = token;
579 }
580 else
581 {
582 XMLElement* e2 = XMLParse ( f, is_include ? NULL : includes, path, &end_tag );
583 if ( !e2 )
584 {
585 throw InvalidBuildFileException (
586 e->location,
587 "end of file found looking for end tag" );
588 break;
589 }
590 if ( end_tag )
591 {
592 if ( e->name != e2->name )
593 {
594 delete e2;
595 throw XMLSyntaxErrorException ( f.Location(),
596 "end tag name mismatch" );
597 break;
598 }
599 delete e2;
600 break;
601 }
602 if ( e->value.size() && !bThisMixingErrorReported )
603 {
604 throw XMLSyntaxErrorException ( f.Location(),
605 "mixing of inner text with sub elements" );
606 bThisMixingErrorReported = true;
607 }
608 e->AddSubElement ( e2 );
609 }
610 }
611 return e;
612 }
613
614 void
615 XMLReadFile ( XMLFile& f, XMLElement& head, XMLIncludes& includes, const Path& path )
616 {
617 for ( ;; )
618 {
619 XMLElement* e = XMLParse ( f, &includes, path );
620 if ( !e )
621 return;
622 head.AddSubElement ( e );
623 }
624 }
625
626 XMLElement*
627 XMLLoadInclude ( XMLElement* e, const Path& path, XMLIncludes& includes )
628 {
629 XMLAttribute* att;
630 att = e->GetAttribute("href",true);
631 assert(att);
632
633 string file ( path.Fixup(att->value,true) );
634 string top_file ( Path::RelativeFromWorkingDirectory ( file ) );
635 e->attributes.push_back ( new XMLAttribute ( "top_href", top_file ) );
636 XMLFile fInc;
637 if ( !fInc.open ( file ) )
638 {
639 // look for xi:fallback element
640 for ( size_t i = 0; i < e->subElements.size(); i++ )
641 {
642 XMLElement* e2 = e->subElements[i];
643 if ( e2->name == "xi:fallback" )
644 {
645 // now look for xi:include below...
646 for ( i = 0; i < e2->subElements.size(); i++ )
647 {
648 XMLElement* e3 = e2->subElements[i];
649 if ( e3->name == "xi:include" )
650 {
651 return XMLLoadInclude ( e3, path, includes );
652 }
653 }
654 throw InvalidBuildFileException (
655 e2->location,
656 "<xi:fallback> must have a <xi:include> sub-element" );
657 return NULL;
658 }
659 }
660 return NULL;
661 }
662 else
663 {
664 XMLElement* new_e = new XMLElement ( e->location );
665 new_e->name = "xi:included";
666 Path path2 ( path, att->value );
667 XMLReadFile ( fInc, *new_e, includes, path2 );
668 return new_e;
669 }
670 }
671
672 XMLElement*
673 XMLLoadFile ( const string& filename, const Path& path )
674 {
675 XMLIncludes includes;
676 XMLFile f;
677
678 if ( !f.open ( filename ) )
679 throw FileNotFoundException ( filename );
680
681 XMLElement* head = new XMLElement("(virtual)");
682
683 XMLReadFile ( f, *head, includes, path );
684
685 for ( size_t i = 0; i < includes.size(); i++ )
686 {
687 XMLElement* e = includes[i]->e;
688 XMLElement* e2 = XMLLoadInclude ( includes[i]->e, includes[i]->path, includes );
689 if ( !e2 )
690 {
691 throw FileNotFoundException (
692 ssprintf("%s (referenced from %s)",
693 e->GetAttribute("top_href",true)->value.c_str(),
694 f.Location().c_str() ) );
695 }
696 XMLElement* parent = e->parentElement;
697 XMLElement** parent_container = NULL;
698 if ( !parent )
699 {
700 delete e;
701 throw Exception ( "internal tool error: xi:include doesn't have a parent" );
702 return NULL;
703 }
704 for ( size_t j = 0; j < parent->subElements.size(); j++ )
705 {
706 if ( parent->subElements[j] == e )
707 {
708 parent_container = &parent->subElements[j];
709 break;
710 }
711 }
712 if ( !parent_container )
713 {
714 delete e;
715 throw Exception ( "internal tool error: couldn't find xi:include in parent's sub-elements" );
716 return NULL;
717 }
718 // replace inclusion tree with the imported tree
719 e2->parentElement = e->parentElement;
720 e2->name = e->name;
721 e2->attributes = e->attributes;
722 *parent_container = e2;
723 e->attributes.resize(0);
724 delete e;
725 }
726 return head;
727 }