96547562496242155df92f9673258828e4be9eea
[reactos.git] / reactos / tools / rbuild / XML.cpp
1 /*
2 * Copyright (C) 2005 Casper S. Hornstrup
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17 */
18 #include "pch.h"
19
20 #ifdef _MSC_VER
21 #define MAX_PATH _MAX_PATH
22 #endif
23
24 #ifdef WIN32
25 # include <direct.h>
26 # include <io.h>
27 #else
28 # include <sys/stat.h>
29 # define MAX_PATH PATH_MAX
30 #endif
31 #include <assert.h>
32
33 #include "XML.h"
34 #include "exception.h"
35 #include "ssprintf.h"
36
37 using std::string;
38 using std::vector;
39
40 #ifdef WIN32
41 #define getcwd _getcwd
42 #endif//WIN32
43
44 static const char* WS = " \t\r\n";
45 static const char* WSEQ = " =\t\r\n";
46
47 string working_directory;
48
49 XMLIncludes::~XMLIncludes()
50 {
51 for ( size_t i = 0; i < this->size(); i++ )
52 delete (*this)[i];
53 }
54
55 void
56 InitWorkingDirectory()
57 {
58 // store the current directory for path calculations
59 working_directory.resize ( _MAX_PATH );
60 working_directory[0] = 0;
61 getcwd ( &working_directory[0], working_directory.size() );
62 working_directory.resize ( strlen ( working_directory.c_str() ) );
63 }
64
65 #ifdef _MSC_VER
66 unsigned __int64
67 #else
68 unsigned long long
69 #endif
70 filelen ( FILE* f )
71 {
72 #ifdef WIN32
73 return _filelengthi64 ( _fileno(f) );
74 #else
75 struct stat64 file_stat;
76 if ( fstat64(fileno(f), &file_stat) != 0 )
77 return 0;
78 return file_stat.st_size;
79 #endif
80 }
81
82 Path::Path()
83 {
84 if ( !working_directory.size() )
85 InitWorkingDirectory();
86 string s ( working_directory );
87 const char* p = strtok ( &s[0], "/\\" );
88 while ( p )
89 {
90 if ( *p )
91 path.push_back ( p );
92 p = strtok ( NULL, "/\\" );
93 }
94 }
95
96 Path::Path ( const Path& cwd, const string& file )
97 {
98 string s ( cwd.Fixup ( file, false ) );
99 const char* p = strtok ( &s[0], "/\\" );
100 while ( p )
101 {
102 if ( *p )
103 path.push_back ( p );
104 p = strtok ( NULL, "/\\" );
105 }
106 }
107
108 string
109 Path::Fixup ( const string& file, bool include_filename ) const
110 {
111 if ( strchr ( "/\\", file[0] )
112 #ifdef WIN32
113 // this squirreliness is b/c win32 has drive letters and *nix doesn't...
114 || file[1] == ':'
115 #endif//WIN32
116 )
117 {
118 return file;
119 }
120 vector<string> pathtmp ( path );
121 string tmp ( file );
122 const char* prev = strtok ( &tmp[0], "/\\" );
123 const char* p = strtok ( NULL, "/\\" );
124 while ( p )
125 {
126 if ( !strcmp ( prev, "." ) )
127 ; // do nothing
128 else if ( !strcmp ( prev, ".." ) )
129 {
130 // this squirreliness is b/c win32 has drive letters and *nix doesn't...
131 #ifdef WIN32
132 if ( pathtmp.size() > 1 )
133 #else
134 if ( pathtmp.size() )
135 #endif
136 pathtmp.resize ( pathtmp.size() - 1 );
137 }
138 else
139 pathtmp.push_back ( prev );
140 prev = p;
141 p = strtok ( NULL, "/\\" );
142 }
143 if ( include_filename )
144 pathtmp.push_back ( prev );
145
146 // reuse tmp variable to return recombined path
147 tmp.resize(0);
148 for ( size_t i = 0; i < pathtmp.size(); i++ )
149 {
150 // this squirreliness is b/c win32 has drive letters and *nix doesn't...
151 #ifdef WIN32
152 if ( i ) tmp += "/";
153 #else
154 tmp += "/";
155 #endif
156 tmp += pathtmp[i];
157 }
158 return tmp;
159 }
160
161 string
162 Path::RelativeFromWorkingDirectory ()
163 {
164 string out = "";
165 for ( size_t i = 0; i < path.size(); i++ )
166 {
167 out += "/" + path[i];
168 }
169 return RelativeFromWorkingDirectory ( out );
170 }
171
172 string
173 Path::RelativeFromWorkingDirectory ( const string& path )
174 {
175 return Path::RelativeFromDirectory ( path, working_directory );
176 }
177
178 string
179 Path::RelativeFromDirectory ( const string& path, const string& base_directory )
180 {
181 vector<string> vbase, vpath, vout;
182 Path::Split ( vbase, base_directory, true );
183 Path::Split ( vpath, path, true );
184 #ifdef WIN32
185 // this squirreliness is b/c win32 has drive letters and *nix doesn't...
186 // not possible to do relative across different drive letters
187 if ( vbase[0] != vpath[0] )
188 return path;
189 #endif
190 size_t i = 0;
191 while ( i < vbase.size() && i < vpath.size() && vbase[i] == vpath[i] )
192 ++i;
193 if ( i < vbase.size() )
194 {
195 // path goes above our base directory, we will need some ..'s
196 for ( size_t j = 0; j < i; j++ )
197 vout.push_back ( ".." );
198 }
199 while ( i < vpath.size() )
200 vout.push_back ( vpath[i++] );
201
202 // now merge vout into a string again
203 string out = vout[0];
204 for ( i = 1; i < vout.size(); i++ )
205 {
206 out += "/" + vout[i];
207 }
208 return out;
209 }
210
211 void
212 Path::Split ( vector<string>& out,
213 const string& path,
214 bool include_last )
215 {
216 string s ( path );
217 const char* prev = strtok ( &s[0], "/\\" );
218 const char* p = strtok ( NULL, "/\\" );
219 out.resize ( 0 );
220 while ( p )
221 {
222 out.push_back ( prev );
223 prev = p;
224 p = strtok ( NULL, "/\\" );
225 }
226 if ( include_last )
227 out.push_back ( prev );
228 }
229
230 XMLFile::XMLFile()
231 {
232 }
233
234 void
235 XMLFile::close()
236 {
237 _buf.resize(0);
238 _p = _end = NULL;
239 }
240
241 bool
242 XMLFile::open(const string& filename_)
243 {
244 close();
245 FILE* f = fopen ( filename_.c_str(), "rb" );
246 if ( !f )
247 return false;
248 unsigned long len = (unsigned long)filelen(f);
249 _buf.resize ( len );
250 fread ( &_buf[0], 1, len, f );
251 fclose ( f );
252 _p = _buf.c_str();
253 _end = _p + len;
254 _filename = filename_;
255 next_token();
256 return true;
257 }
258
259 // next_token() moves the pointer to next token, which may be
260 // an xml element or a text element, basically it's a glorified
261 // skipspace, normally the user of this class won't need to call
262 // this function
263 void
264 XMLFile::next_token()
265 {
266 _p += strspn ( _p, WS );
267 }
268
269 bool
270 XMLFile::next_is_text()
271 {
272 return *_p != '<';
273 }
274
275 bool
276 XMLFile::more_tokens()
277 {
278 return _p != _end;
279 }
280
281 // get_token() is used to return a token, and move the pointer
282 // past the token
283 bool
284 XMLFile::get_token(string& token)
285 {
286 const char* tokend;
287 if ( !strncmp ( _p, "<!--", 4 ) )
288 {
289 tokend = strstr ( _p, "-->" );
290 if ( !tokend )
291 tokend = _end;
292 else
293 tokend += 3;
294 }
295 else if ( !strncmp ( _p, "<?", 2 ) )
296 {
297 tokend = strstr ( _p, "?>" );
298 if ( !tokend )
299 tokend = _end;
300 else
301 tokend += 2;
302 }
303 else if ( *_p == '<' )
304 {
305 tokend = strchr ( _p, '>' );
306 if ( !tokend )
307 tokend = _end;
308 else
309 ++tokend;
310 }
311 else
312 {
313 tokend = strchr ( _p, '<' );
314 if ( !tokend )
315 tokend = _end;
316 while ( tokend > _p && isspace(tokend[-1]) )
317 --tokend;
318 }
319 if ( tokend == _p )
320 return false;
321 token = string ( _p, tokend-_p );
322 _p = tokend;
323 next_token();
324 return true;
325 }
326
327 string
328 XMLFile::Location() const
329 {
330 int line = 1;
331 const char* p = strchr ( _buf.c_str(), '\n' );
332 while ( p && p < _p )
333 {
334 ++line;
335 p = strchr ( p+1, '\n' );
336 }
337 return ssprintf ( "%s(%i)",_filename.c_str(), line );
338 }
339
340 XMLAttribute::XMLAttribute()
341 {
342 }
343
344 XMLAttribute::XMLAttribute(const string& name_,
345 const string& value_)
346 : name(name_), value(value_)
347 {
348 }
349
350 XMLAttribute::XMLAttribute ( const XMLAttribute& src )
351 : name(src.name), value(src.value)
352 {
353
354 }
355
356 XMLAttribute& XMLAttribute::operator = ( const XMLAttribute& src )
357 {
358 name = src.name;
359 value = src.value;
360 return *this;
361 }
362
363 XMLElement::XMLElement ( XMLFile* xmlFile,
364 const string& location )
365 : xmlFile ( xmlFile ),
366 location ( location ),
367 parentElement ( NULL )
368 {
369 }
370
371 XMLElement::~XMLElement()
372 {
373 size_t i;
374 for ( i = 0; i < attributes.size(); i++ )
375 delete attributes[i];
376 for ( i = 0; i < subElements.size(); i++ )
377 delete subElements[i];
378 }
379
380 void
381 XMLElement::AddSubElement ( XMLElement* e )
382 {
383 subElements.push_back ( e );
384 e->parentElement = this;
385 }
386
387 // Parse()
388 // This function takes a single xml tag ( i.e. beginning with '<' and
389 // ending with '>', and parses out it's tag name and constituent
390 // attributes.
391 // Return Value: returns true if you need to look for a </tag> for
392 // the one it just parsed...
393 bool
394 XMLElement::Parse(const string& token,
395 bool& end_tag)
396 {
397 const char* p = token.c_str();
398 assert ( *p == '<' );
399 ++p;
400 p += strspn ( p, WS );
401
402 // check if this is a comment
403 if ( !strncmp ( p, "!--", 3 ) )
404 {
405 name = "!--";
406 end_tag = false;
407 return false; // never look for end tag to a comment
408 }
409
410 end_tag = ( *p == '/' );
411 if ( end_tag )
412 {
413 ++p;
414 p += strspn ( p, WS );
415 }
416 const char* end = strpbrk ( p, WS );
417 if ( !end )
418 {
419 end = strpbrk ( p, "/>" );
420 assert ( end );
421 }
422 name = string ( p, end-p );
423 p = end;
424 p += strspn ( p, WS );
425 while ( *p != '>' && *p != '/' )
426 {
427 end = strpbrk ( p, WSEQ );
428 if ( !end )
429 {
430 end = strpbrk ( p, "/>" );
431 assert ( end );
432 }
433 string attribute ( p, end-p ), value;
434 p = end;
435 p += strspn ( p, WS );
436 if ( *p == '=' )
437 {
438 ++p;
439 p += strspn ( p, WS );
440 char quote = 0;
441 if ( strchr ( "\"'", *p ) )
442 {
443 quote = *p++;
444 end = strchr ( p, quote );
445 }
446 else
447 {
448 end = strpbrk ( p, WS );
449 }
450 if ( !end )
451 {
452 end = strchr ( p, '>' );
453 assert(end);
454 if ( end[-1] == '/' )
455 end--;
456 }
457 value = string ( p, end-p );
458 p = end;
459 if ( quote && *p == quote )
460 p++;
461 p += strspn ( p, WS );
462 }
463 else if ( name[0] != '!' )
464 {
465 throw XMLSyntaxErrorException ( location,
466 "attributes must have values" );
467 }
468 attributes.push_back ( new XMLAttribute ( attribute, value ) );
469 }
470 return !( *p == '/' ) && !end_tag;
471 }
472
473 XMLAttribute*
474 XMLElement::GetAttribute ( const string& attribute,
475 bool required )
476 {
477 // this would be faster with a tree-based container, but our attribute
478 // lists are likely to stay so short as to not be an issue.
479 for ( size_t i = 0; i < attributes.size(); i++ )
480 {
481 if ( attribute == attributes[i]->name )
482 return attributes[i];
483 }
484 if ( required )
485 {
486 throw RequiredAttributeNotFoundException ( location,
487 attribute,
488 name );
489 }
490 return NULL;
491 }
492
493 const XMLAttribute*
494 XMLElement::GetAttribute ( const string& attribute,
495 bool required ) const
496 {
497 // this would be faster with a tree-based container, but our attribute
498 // lists are likely to stay so short as to not be an issue.
499 for ( size_t i = 0; i < attributes.size(); i++ )
500 {
501 if ( attribute == attributes[i]->name )
502 return attributes[i];
503 }
504 if ( required )
505 {
506 throw RequiredAttributeNotFoundException ( location,
507 attribute,
508 name );
509 }
510 return NULL;
511 }
512
513 // XMLParse()
514 // This function reads a "token" from the file loaded in XMLFile
515 // if it finds a tag that is non-singular, it parses sub-elements and/or
516 // inner text into the XMLElement that it is building to return.
517 // Return Value: an XMLElement allocated via the new operator that contains
518 // it's parsed data. Keep calling this function until it returns NULL
519 // (no more data)
520 XMLElement*
521 XMLParse ( XMLFile& f,
522 XMLIncludes* includes,
523 const Path& path,
524 bool* pend_tag = NULL )
525 {
526 string token;
527 if ( !f.get_token(token) )
528 return NULL;
529 bool end_tag, is_include = false;
530
531 while ( token[0] != '<'
532 || !strncmp ( token.c_str (), "<!--", 4 )
533 || !strncmp ( token.c_str (), "<?", 2 ) )
534 {
535 if ( token[0] != '<' )
536 throw XMLSyntaxErrorException ( f.Location (),
537 "expecting xml tag, not '%s'",
538 token.c_str () );
539 if ( !f.get_token(token) )
540 return NULL;
541 }
542
543 XMLElement* e = new XMLElement ( &f,
544 f.Location () );
545 bool bNeedEnd = e->Parse ( token, end_tag );
546
547 if ( e->name == "xi:include" && includes )
548 {
549 XMLAttribute* att;
550 att = e->GetAttribute ( "href", true );
551 assert ( att );
552 string includeFile ( path.Fixup ( att->value, true ) );
553 string topIncludeFile ( Path::RelativeFromWorkingDirectory ( includeFile ) );
554 includes->push_back ( new XMLInclude ( e, path, topIncludeFile ) );
555 is_include = true;
556 }
557
558 if ( !bNeedEnd )
559 {
560 if ( pend_tag )
561 *pend_tag = end_tag;
562 else if ( end_tag )
563 {
564 delete e;
565 throw XMLSyntaxErrorException ( f.Location (),
566 "end tag '%s' not expected",
567 token.c_str() );
568 return NULL;
569 }
570 return e;
571 }
572 bool bThisMixingErrorReported = false;
573 while ( f.more_tokens () )
574 {
575 if ( f.next_is_text () )
576 {
577 if ( !f.get_token ( token ) || token.size () == 0 )
578 {
579 throw InvalidBuildFileException (
580 f.Location(),
581 "internal tool error - get_token() failed when more_tokens() returned true" );
582 break;
583 }
584 if ( e->subElements.size() && !bThisMixingErrorReported )
585 {
586 throw XMLSyntaxErrorException ( f.Location (),
587 "mixing of inner text with sub elements" );
588 bThisMixingErrorReported = true;
589 }
590 if ( strchr ( token.c_str (), '>' ) )
591 {
592 throw XMLSyntaxErrorException ( f.Location (),
593 "invalid symbol '>'" );
594 }
595 if ( e->value.size() > 0 )
596 {
597 throw XMLSyntaxErrorException ( f.Location (),
598 "multiple instances of inner text" );
599 e->value += " " + token;
600 }
601 else
602 e->value = token;
603 }
604 else
605 {
606 XMLElement* e2 = XMLParse ( f, is_include ? NULL : includes, path, &end_tag );
607 if ( !e2 )
608 {
609 throw InvalidBuildFileException (
610 e->location,
611 "end of file found looking for end tag" );
612 break;
613 }
614 if ( end_tag )
615 {
616 if ( e->name != e2->name )
617 {
618 delete e2;
619 throw XMLSyntaxErrorException ( f.Location (),
620 "end tag name mismatch" );
621 break;
622 }
623 delete e2;
624 break;
625 }
626 if ( e->value.size () > 0 && !bThisMixingErrorReported )
627 {
628 throw XMLSyntaxErrorException ( f.Location (),
629 "mixing of inner text with sub elements" );
630 bThisMixingErrorReported = true;
631 }
632 e->AddSubElement ( e2 );
633 }
634 }
635 return e;
636 }
637
638 void
639 XMLReadFile ( XMLFile& f, XMLElement& head, XMLIncludes& includes, const Path& path )
640 {
641 for ( ;; )
642 {
643 XMLElement* e = XMLParse ( f, &includes, path );
644 if ( !e )
645 return;
646 head.AddSubElement ( e );
647 }
648 }
649
650 XMLElement*
651 XMLLoadInclude ( XMLInclude& include,
652 XMLIncludes& includes )
653 {
654 XMLAttribute* att;
655 att = include.e->GetAttribute("href", true);
656 assert(att);
657
658 string file ( include.path.Fixup(att->value, true) );
659 string top_file ( Path::RelativeFromWorkingDirectory ( file ) );
660 include.e->attributes.push_back ( new XMLAttribute ( "top_href", top_file ) );
661 XMLFile* fInc = new XMLFile();
662 if ( !fInc->open ( file ) )
663 {
664 include.fileExists = false;
665 // look for xi:fallback element
666 for ( size_t i = 0; i < include.e->subElements.size (); i++ )
667 {
668 XMLElement* e2 = include.e->subElements[i];
669 if ( e2->name == "xi:fallback" )
670 {
671 // now look for xi:include below...
672 for ( i = 0; i < e2->subElements.size (); i++ )
673 {
674 XMLElement* e3 = e2->subElements[i];
675 if ( e3->name == "xi:include" )
676 {
677 att = e3->GetAttribute ( "href", true );
678 assert ( att );
679 string includeFile ( include.path.Fixup ( att->value, true ) );
680 string topIncludeFile ( Path::RelativeFromWorkingDirectory ( includeFile ) );
681 XMLInclude* fallbackInclude = new XMLInclude ( e3, include.path, topIncludeFile );
682 return XMLLoadInclude ( *fallbackInclude, includes );
683 }
684 }
685 throw InvalidBuildFileException (
686 e2->location,
687 "<xi:fallback> must have a <xi:include> sub-element" );
688 return NULL;
689 }
690 }
691 return NULL;
692 }
693 else
694 {
695 include.fileExists = true;
696 XMLElement* new_e = new XMLElement ( fInc,
697 include.e->location );
698 new_e->name = "xi:included";
699 Path path2 ( include.path, att->value );
700 XMLReadFile ( *fInc, *new_e, includes, path2 );
701 return new_e;
702 }
703 }
704
705 XMLElement*
706 XMLLoadFile ( const string& filename,
707 const Path& path,
708 XMLIncludes& includes )
709 {
710 XMLFile* f = new XMLFile();
711
712 if ( !f->open ( filename ) )
713 throw FileNotFoundException ( filename );
714
715 XMLElement* head = new XMLElement ( f,
716 "(virtual)" );
717
718 XMLReadFile ( *f, *head, includes, path );
719
720 for ( size_t i = 0; i < includes.size (); i++ )
721 {
722 XMLElement* e = includes[i]->e;
723 XMLElement* e2 = XMLLoadInclude ( *includes[i], includes );
724 if ( !e2 )
725 {
726 throw FileNotFoundException (
727 ssprintf ( "%s (referenced from %s)",
728 e->GetAttribute ( "top_href", true )->value.c_str (),
729 f->Location ().c_str () ) );
730 }
731 XMLElement* parent = e->parentElement;
732 XMLElement** parent_container = NULL;
733 if ( !parent )
734 {
735 delete e;
736 throw Exception ( "internal tool error: xi:include doesn't have a parent" );
737 return NULL;
738 }
739 for ( size_t j = 0; j < parent->subElements.size (); j++ )
740 {
741 if ( parent->subElements[j] == e )
742 {
743 parent_container = &parent->subElements[j];
744 break;
745 }
746 }
747 if ( !parent_container )
748 {
749 delete e;
750 throw Exception ( "internal tool error: couldn't find xi:include in parent's sub-elements" );
751 return NULL;
752 }
753 // replace inclusion tree with the imported tree
754 e2->parentElement = e->parentElement;
755 e2->name = e->name;
756 e2->attributes = e->attributes;
757 *parent_container = e2;
758 e->attributes.resize ( 0 );
759 delete e;
760 }
761 return head;
762 }