Document <module type="nativecui" ...>
[reactos.git] / reactos / tools / rbuild / XML.cpp
1 /*
2 * Copyright (C) 2005 Casper S. Hornstrup
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17 */
18 #include "pch.h"
19
20 #ifdef _MSC_VER
21 #define MAX_PATH _MAX_PATH
22 #endif
23
24 #ifdef WIN32
25 # include <direct.h>
26 # include <io.h>
27 #else
28 # include <sys/stat.h>
29 # define MAX_PATH PATH_MAX
30 #endif
31 #include <assert.h>
32
33 #include "XML.h"
34 #include "exception.h"
35 #include "ssprintf.h"
36
37 using std::string;
38 using std::vector;
39
40 #ifdef WIN32
41 #define getcwd _getcwd
42 #endif//WIN32
43
44 static const char* WS = " \t\r\n";
45 static const char* WSEQ = " =\t\r\n";
46
47 string working_directory;
48
49 XMLIncludes::~XMLIncludes()
50 {
51 for ( size_t i = 0; i < this->size(); i++ )
52 delete (*this)[i];
53 }
54
55 void
56 InitWorkingDirectory()
57 {
58 // store the current directory for path calculations
59 working_directory.resize ( _MAX_PATH );
60 working_directory[0] = 0;
61 getcwd ( &working_directory[0], working_directory.size() );
62 working_directory.resize ( strlen ( working_directory.c_str() ) );
63 }
64
65 #ifdef _MSC_VER
66 unsigned __int64
67 #else
68 unsigned long long
69 #endif
70 filelen ( FILE* f )
71 {
72 #ifdef WIN32
73 return _filelengthi64 ( _fileno(f) );
74 #else
75 struct stat64 file_stat;
76 if ( fstat64(fileno(f), &file_stat) != 0 )
77 return 0;
78 return file_stat.st_size;
79 #endif
80 }
81
82 Path::Path()
83 {
84 if ( !working_directory.size() )
85 InitWorkingDirectory();
86 string s ( working_directory );
87 const char* p = strtok ( &s[0], "/\\" );
88 while ( p )
89 {
90 if ( *p )
91 path.push_back ( p );
92 p = strtok ( NULL, "/\\" );
93 }
94 }
95
96 Path::Path ( const Path& cwd, const string& file )
97 {
98 string s ( cwd.Fixup ( file, false ) );
99 const char* p = strtok ( &s[0], "/\\" );
100 while ( p )
101 {
102 if ( *p )
103 path.push_back ( p );
104 p = strtok ( NULL, "/\\" );
105 }
106 }
107
108 string
109 Path::Fixup ( const string& file, bool include_filename ) const
110 {
111 if ( strchr ( "/\\", file[0] )
112 #ifdef WIN32
113 // this squirreliness is b/c win32 has drive letters and *nix doesn't...
114 || file[1] == ':'
115 #endif//WIN32
116 )
117 {
118 return file;
119 }
120 vector<string> pathtmp ( path );
121 string tmp ( file );
122 const char* prev = strtok ( &tmp[0], "/\\" );
123 const char* p = strtok ( NULL, "/\\" );
124 while ( p )
125 {
126 if ( !strcmp ( prev, "." ) )
127 ; // do nothing
128 else if ( !strcmp ( prev, ".." ) )
129 {
130 // this squirreliness is b/c win32 has drive letters and *nix doesn't...
131 #ifdef WIN32
132 if ( pathtmp.size() > 1 )
133 #else
134 if ( pathtmp.size() )
135 #endif
136 pathtmp.resize ( pathtmp.size() - 1 );
137 }
138 else
139 pathtmp.push_back ( prev );
140 prev = p;
141 p = strtok ( NULL, "/\\" );
142 }
143 if ( include_filename )
144 pathtmp.push_back ( prev );
145
146 // reuse tmp variable to return recombined path
147 tmp.resize(0);
148 for ( size_t i = 0; i < pathtmp.size(); i++ )
149 {
150 // this squirreliness is b/c win32 has drive letters and *nix doesn't...
151 #ifdef WIN32
152 if ( i ) tmp += "/";
153 #else
154 tmp += "/";
155 #endif
156 tmp += pathtmp[i];
157 }
158 return tmp;
159 }
160
161 string
162 Path::RelativeFromWorkingDirectory ()
163 {
164 string out = "";
165 for ( size_t i = 0; i < path.size(); i++ )
166 {
167 out += "/" + path[i];
168 }
169 return RelativeFromWorkingDirectory ( out );
170 }
171
172 string
173 Path::RelativeFromWorkingDirectory ( const string& path )
174 {
175 vector<string> vwork, vpath, vout;
176 Path::Split ( vwork, working_directory, true );
177 Path::Split ( vpath, path, true );
178 #ifdef WIN32
179 // this squirreliness is b/c win32 has drive letters and *nix doesn't...
180 // not possible to do relative across different drive letters
181 if ( vwork[0] != vpath[0] )
182 return path;
183 #endif
184 size_t i = 0;
185 while ( i < vwork.size() && i < vpath.size() && vwork[i] == vpath[i] )
186 ++i;
187 if ( i < vwork.size() )
188 {
189 // path goes above our working directory, we will need some ..'s
190 for ( size_t j = 0; j < i; j++ )
191 vout.push_back ( ".." );
192 }
193 while ( i < vpath.size() )
194 vout.push_back ( vpath[i++] );
195
196 // now merge vout into a string again
197 string out = vout[0];
198 for ( i = 1; i < vout.size(); i++ )
199 {
200 out += "/" + vout[i];
201 }
202 return out;
203 }
204
205 void
206 Path::Split ( vector<string>& out,
207 const string& path,
208 bool include_last )
209 {
210 string s ( path );
211 const char* prev = strtok ( &s[0], "/\\" );
212 const char* p = strtok ( NULL, "/\\" );
213 out.resize ( 0 );
214 while ( p )
215 {
216 out.push_back ( prev );
217 prev = p;
218 p = strtok ( NULL, "/\\" );
219 }
220 if ( include_last )
221 out.push_back ( prev );
222 }
223
224 XMLFile::XMLFile()
225 {
226 }
227
228 void
229 XMLFile::close()
230 {
231 _buf.resize(0);
232 _p = _end = NULL;
233 }
234
235 bool
236 XMLFile::open(const string& filename_)
237 {
238 close();
239 FILE* f = fopen ( filename_.c_str(), "rb" );
240 if ( !f )
241 return false;
242 unsigned long len = (unsigned long)filelen(f);
243 _buf.resize ( len );
244 fread ( &_buf[0], 1, len, f );
245 fclose ( f );
246 _p = _buf.c_str();
247 _end = _p + len;
248 _filename = filename_;
249 next_token();
250 return true;
251 }
252
253 // next_token() moves the pointer to next token, which may be
254 // an xml element or a text element, basically it's a glorified
255 // skipspace, normally the user of this class won't need to call
256 // this function
257 void
258 XMLFile::next_token()
259 {
260 _p += strspn ( _p, WS );
261 }
262
263 bool
264 XMLFile::next_is_text()
265 {
266 return *_p != '<';
267 }
268
269 bool
270 XMLFile::more_tokens()
271 {
272 return _p != _end;
273 }
274
275 // get_token() is used to return a token, and move the pointer
276 // past the token
277 bool
278 XMLFile::get_token(string& token)
279 {
280 const char* tokend;
281 if ( !strncmp ( _p, "<!--", 4 ) )
282 {
283 tokend = strstr ( _p, "-->" );
284 if ( !tokend )
285 tokend = _end;
286 else
287 tokend += 3;
288 }
289 else if ( !strncmp ( _p, "<?", 2 ) )
290 {
291 tokend = strstr ( _p, "?>" );
292 if ( !tokend )
293 tokend = _end;
294 else
295 tokend += 2;
296 }
297 else if ( *_p == '<' )
298 {
299 tokend = strchr ( _p, '>' );
300 if ( !tokend )
301 tokend = _end;
302 else
303 ++tokend;
304 }
305 else
306 {
307 tokend = strchr ( _p, '<' );
308 if ( !tokend )
309 tokend = _end;
310 while ( tokend > _p && isspace(tokend[-1]) )
311 --tokend;
312 }
313 if ( tokend == _p )
314 return false;
315 token = string ( _p, tokend-_p );
316 _p = tokend;
317 next_token();
318 return true;
319 }
320
321 string
322 XMLFile::Location() const
323 {
324 int line = 1;
325 const char* p = strchr ( _buf.c_str(), '\n' );
326 while ( p && p < _p )
327 {
328 ++line;
329 p = strchr ( p+1, '\n' );
330 }
331 return ssprintf ( "%s(%i)",_filename.c_str(), line );
332 }
333
334 XMLAttribute::XMLAttribute()
335 {
336 }
337
338 XMLAttribute::XMLAttribute(const string& name_,
339 const string& value_)
340 : name(name_), value(value_)
341 {
342 }
343
344 XMLAttribute::XMLAttribute ( const XMLAttribute& src )
345 : name(src.name), value(src.value)
346 {
347
348 }
349
350 XMLAttribute& XMLAttribute::operator = ( const XMLAttribute& src )
351 {
352 name = src.name;
353 value = src.value;
354 return *this;
355 }
356
357 XMLElement::XMLElement ( XMLFile* xmlFile,
358 const string& location )
359 : xmlFile ( xmlFile ),
360 location ( location ),
361 parentElement ( NULL )
362 {
363 }
364
365 XMLElement::~XMLElement()
366 {
367 size_t i;
368 for ( i = 0; i < attributes.size(); i++ )
369 delete attributes[i];
370 for ( i = 0; i < subElements.size(); i++ )
371 delete subElements[i];
372 }
373
374 void
375 XMLElement::AddSubElement ( XMLElement* e )
376 {
377 subElements.push_back ( e );
378 e->parentElement = this;
379 }
380
381 // Parse()
382 // This function takes a single xml tag ( i.e. beginning with '<' and
383 // ending with '>', and parses out it's tag name and constituent
384 // attributes.
385 // Return Value: returns true if you need to look for a </tag> for
386 // the one it just parsed...
387 bool
388 XMLElement::Parse(const string& token,
389 bool& end_tag)
390 {
391 const char* p = token.c_str();
392 assert ( *p == '<' );
393 ++p;
394 p += strspn ( p, WS );
395
396 // check if this is a comment
397 if ( !strncmp ( p, "!--", 3 ) )
398 {
399 name = "!--";
400 end_tag = false;
401 return false; // never look for end tag to a comment
402 }
403
404 end_tag = ( *p == '/' );
405 if ( end_tag )
406 {
407 ++p;
408 p += strspn ( p, WS );
409 }
410 const char* end = strpbrk ( p, WS );
411 if ( !end )
412 {
413 end = strpbrk ( p, "/>" );
414 assert ( end );
415 }
416 name = string ( p, end-p );
417 p = end;
418 p += strspn ( p, WS );
419 while ( *p != '>' && *p != '/' )
420 {
421 end = strpbrk ( p, WSEQ );
422 if ( !end )
423 {
424 end = strpbrk ( p, "/>" );
425 assert ( end );
426 }
427 string attribute ( p, end-p ), value;
428 p = end;
429 p += strspn ( p, WS );
430 if ( *p == '=' )
431 {
432 ++p;
433 p += strspn ( p, WS );
434 char quote = 0;
435 if ( strchr ( "\"'", *p ) )
436 {
437 quote = *p++;
438 end = strchr ( p, quote );
439 }
440 else
441 {
442 end = strpbrk ( p, WS );
443 }
444 if ( !end )
445 {
446 end = strchr ( p, '>' );
447 assert(end);
448 if ( end[-1] == '/' )
449 end--;
450 }
451 value = string ( p, end-p );
452 p = end;
453 if ( quote && *p == quote )
454 p++;
455 p += strspn ( p, WS );
456 }
457 else if ( name[0] != '!' )
458 {
459 throw XMLSyntaxErrorException ( location,
460 "attributes must have values" );
461 }
462 attributes.push_back ( new XMLAttribute ( attribute, value ) );
463 }
464 return !( *p == '/' ) && !end_tag;
465 }
466
467 XMLAttribute*
468 XMLElement::GetAttribute ( const string& attribute,
469 bool required )
470 {
471 // this would be faster with a tree-based container, but our attribute
472 // lists are likely to stay so short as to not be an issue.
473 for ( size_t i = 0; i < attributes.size(); i++ )
474 {
475 if ( attribute == attributes[i]->name )
476 return attributes[i];
477 }
478 if ( required )
479 {
480 throw RequiredAttributeNotFoundException ( location,
481 attribute,
482 name );
483 }
484 return NULL;
485 }
486
487 const XMLAttribute*
488 XMLElement::GetAttribute ( const string& attribute,
489 bool required ) const
490 {
491 // this would be faster with a tree-based container, but our attribute
492 // lists are likely to stay so short as to not be an issue.
493 for ( size_t i = 0; i < attributes.size(); i++ )
494 {
495 if ( attribute == attributes[i]->name )
496 return attributes[i];
497 }
498 if ( required )
499 {
500 throw RequiredAttributeNotFoundException ( location,
501 attribute,
502 name );
503 }
504 return NULL;
505 }
506
507 // XMLParse()
508 // This function reads a "token" from the file loaded in XMLFile
509 // if it finds a tag that is non-singular, it parses sub-elements and/or
510 // inner text into the XMLElement that it is building to return.
511 // Return Value: an XMLElement allocated via the new operator that contains
512 // it's parsed data. Keep calling this function until it returns NULL
513 // (no more data)
514 XMLElement*
515 XMLParse ( XMLFile& f,
516 XMLIncludes* includes,
517 const Path& path,
518 bool* pend_tag = NULL )
519 {
520 string token;
521 if ( !f.get_token(token) )
522 return NULL;
523 bool end_tag, is_include = false;
524
525 while ( token[0] != '<'
526 || !strncmp ( token.c_str (), "<!--", 4 )
527 || !strncmp ( token.c_str (), "<?", 2 ) )
528 {
529 if ( token[0] != '<' )
530 throw XMLSyntaxErrorException ( f.Location (),
531 "expecting xml tag, not '%s'",
532 token.c_str () );
533 if ( !f.get_token(token) )
534 return NULL;
535 }
536
537 XMLElement* e = new XMLElement ( &f,
538 f.Location () );
539 bool bNeedEnd = e->Parse ( token, end_tag );
540
541 if ( e->name == "xi:include" && includes )
542 {
543 XMLAttribute* att;
544 att = e->GetAttribute ( "href", true );
545 assert ( att );
546 string includeFile ( path.Fixup ( att->value, true ) );
547 string topIncludeFile ( Path::RelativeFromWorkingDirectory ( includeFile ) );
548 includes->push_back ( new XMLInclude ( e, path, topIncludeFile ) );
549 is_include = true;
550 }
551
552 if ( !bNeedEnd )
553 {
554 if ( pend_tag )
555 *pend_tag = end_tag;
556 else if ( end_tag )
557 {
558 delete e;
559 throw XMLSyntaxErrorException ( f.Location (),
560 "end tag '%s' not expected",
561 token.c_str() );
562 return NULL;
563 }
564 return e;
565 }
566 bool bThisMixingErrorReported = false;
567 while ( f.more_tokens () )
568 {
569 if ( f.next_is_text () )
570 {
571 if ( !f.get_token ( token ) || token.size () == 0 )
572 {
573 throw InvalidBuildFileException (
574 f.Location(),
575 "internal tool error - get_token() failed when more_tokens() returned true" );
576 break;
577 }
578 if ( e->subElements.size() && !bThisMixingErrorReported )
579 {
580 throw XMLSyntaxErrorException ( f.Location (),
581 "mixing of inner text with sub elements" );
582 bThisMixingErrorReported = true;
583 }
584 if ( strchr ( token.c_str (), '>' ) )
585 {
586 throw XMLSyntaxErrorException ( f.Location (),
587 "invalid symbol '>'" );
588 }
589 if ( e->value.size() > 0 )
590 {
591 throw XMLSyntaxErrorException ( f.Location (),
592 "multiple instances of inner text" );
593 e->value += " " + token;
594 }
595 else
596 e->value = token;
597 }
598 else
599 {
600 XMLElement* e2 = XMLParse ( f, is_include ? NULL : includes, path, &end_tag );
601 if ( !e2 )
602 {
603 throw InvalidBuildFileException (
604 e->location,
605 "end of file found looking for end tag" );
606 break;
607 }
608 if ( end_tag )
609 {
610 if ( e->name != e2->name )
611 {
612 delete e2;
613 throw XMLSyntaxErrorException ( f.Location (),
614 "end tag name mismatch" );
615 break;
616 }
617 delete e2;
618 break;
619 }
620 if ( e->value.size () > 0 && !bThisMixingErrorReported )
621 {
622 throw XMLSyntaxErrorException ( f.Location (),
623 "mixing of inner text with sub elements" );
624 bThisMixingErrorReported = true;
625 }
626 e->AddSubElement ( e2 );
627 }
628 }
629 return e;
630 }
631
632 void
633 XMLReadFile ( XMLFile& f, XMLElement& head, XMLIncludes& includes, const Path& path )
634 {
635 for ( ;; )
636 {
637 XMLElement* e = XMLParse ( f, &includes, path );
638 if ( !e )
639 return;
640 head.AddSubElement ( e );
641 }
642 }
643
644 XMLElement*
645 XMLLoadInclude ( XMLInclude& include,
646 XMLIncludes& includes )
647 {
648 XMLAttribute* att;
649 att = include.e->GetAttribute("href", true);
650 assert(att);
651
652 string file ( include.path.Fixup(att->value, true) );
653 string top_file ( Path::RelativeFromWorkingDirectory ( file ) );
654 include.e->attributes.push_back ( new XMLAttribute ( "top_href", top_file ) );
655 XMLFile* fInc = new XMLFile();
656 if ( !fInc->open ( file ) )
657 {
658 include.fileExists = false;
659 // look for xi:fallback element
660 for ( size_t i = 0; i < include.e->subElements.size (); i++ )
661 {
662 XMLElement* e2 = include.e->subElements[i];
663 if ( e2->name == "xi:fallback" )
664 {
665 // now look for xi:include below...
666 for ( i = 0; i < e2->subElements.size (); i++ )
667 {
668 XMLElement* e3 = e2->subElements[i];
669 if ( e3->name == "xi:include" )
670 {
671 att = e3->GetAttribute ( "href", true );
672 assert ( att );
673 string includeFile ( include.path.Fixup ( att->value, true ) );
674 string topIncludeFile ( Path::RelativeFromWorkingDirectory ( includeFile ) );
675 XMLInclude* fallbackInclude = new XMLInclude ( e3, include.path, topIncludeFile );
676 return XMLLoadInclude ( *fallbackInclude, includes );
677 }
678 }
679 throw InvalidBuildFileException (
680 e2->location,
681 "<xi:fallback> must have a <xi:include> sub-element" );
682 return NULL;
683 }
684 }
685 return NULL;
686 }
687 else
688 {
689 include.fileExists = true;
690 XMLElement* new_e = new XMLElement ( fInc,
691 include.e->location );
692 new_e->name = "xi:included";
693 Path path2 ( include.path, att->value );
694 XMLReadFile ( *fInc, *new_e, includes, path2 );
695 return new_e;
696 }
697 }
698
699 XMLElement*
700 XMLLoadFile ( const string& filename,
701 const Path& path,
702 XMLIncludes& includes )
703 {
704 XMLFile* f = new XMLFile();
705
706 if ( !f->open ( filename ) )
707 throw FileNotFoundException ( filename );
708
709 XMLElement* head = new XMLElement ( f,
710 "(virtual)" );
711
712 XMLReadFile ( *f, *head, includes, path );
713
714 for ( size_t i = 0; i < includes.size (); i++ )
715 {
716 XMLElement* e = includes[i]->e;
717 XMLElement* e2 = XMLLoadInclude ( *includes[i], includes );
718 if ( !e2 )
719 {
720 throw FileNotFoundException (
721 ssprintf ( "%s (referenced from %s)",
722 e->GetAttribute ( "top_href", true )->value.c_str (),
723 f->Location ().c_str () ) );
724 }
725 XMLElement* parent = e->parentElement;
726 XMLElement** parent_container = NULL;
727 if ( !parent )
728 {
729 delete e;
730 throw Exception ( "internal tool error: xi:include doesn't have a parent" );
731 return NULL;
732 }
733 for ( size_t j = 0; j < parent->subElements.size (); j++ )
734 {
735 if ( parent->subElements[j] == e )
736 {
737 parent_container = &parent->subElements[j];
738 break;
739 }
740 }
741 if ( !parent_container )
742 {
743 delete e;
744 throw Exception ( "internal tool error: couldn't find xi:include in parent's sub-elements" );
745 return NULL;
746 }
747 // replace inclusion tree with the imported tree
748 e2->parentElement = e->parentElement;
749 e2->name = e->name;
750 e2->attributes = e->attributes;
751 *parent_container = e2;
752 e->attributes.resize ( 0 );
753 delete e;
754 }
755 return head;
756 }