074af9e299d1d7f14b46c4a6704ddff9ec4c30db
[reactos.git] / reactos / tools / rbuild / XML.cpp
1 /*
2 * Copyright (C) 2005 Casper S. Hornstrup
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17 */
18 #include "pch.h"
19
20 #ifndef MAX_PATH
21 #define MAX_PATH _MAX_PATH
22 #endif
23
24 #ifdef WIN32
25 # include <direct.h>
26 # include <io.h>
27 #else
28 # include <sys/stat.h>
29 # define MAX_PATH PATH_MAX
30 #endif
31 #include <assert.h>
32
33 #include "XML.h"
34 #include "exception.h"
35 #include "ssprintf.h"
36
37 using std::string;
38 using std::vector;
39
40 #ifdef WIN32
41 #define getcwd _getcwd
42 #endif//WIN32
43
44 static const char* WS = " \t\r\n";
45 static const char* WSEQ = " =\t\r\n";
46
47 string working_directory;
48
49 XMLIncludes::~XMLIncludes()
50 {
51 for ( size_t i = 0; i < this->size(); i++ )
52 delete (*this)[i];
53 }
54
55 void
56 InitWorkingDirectory()
57 {
58 // store the current directory for path calculations
59 working_directory.resize ( _MAX_PATH );
60 working_directory[0] = 0;
61 getcwd ( &working_directory[0], working_directory.size() );
62 working_directory.resize ( strlen ( working_directory.c_str() ) );
63 }
64
65 #ifdef _MSC_VER
66 unsigned __int64
67 #else
68 unsigned long long
69 #endif
70 filelen ( FILE* f )
71 {
72 #ifdef WIN32
73 return _filelengthi64 ( _fileno(f) );
74 #else
75 struct stat64 file_stat;
76 if ( fstat64(fileno(f), &file_stat) != 0 )
77 return 0;
78 return file_stat.st_size;
79 #endif
80 }
81
82 Path::Path()
83 {
84 if ( !working_directory.size() )
85 InitWorkingDirectory();
86 string s ( working_directory );
87 const char* p = strtok ( &s[0], "/\\" );
88 while ( p )
89 {
90 if ( *p )
91 path.push_back ( p );
92 p = strtok ( NULL, "/\\" );
93 }
94 }
95
96 Path::Path ( const Path& cwd, const string& file )
97 {
98 string s ( cwd.Fixup ( file, false ) );
99 const char* p = strtok ( &s[0], "/\\" );
100 while ( p )
101 {
102 if ( *p )
103 path.push_back ( p );
104 p = strtok ( NULL, "/\\" );
105 }
106 }
107
108 string
109 Path::Fixup ( const string& file, bool include_filename ) const
110 {
111 if ( strchr ( "/\\", file[0] )
112 #ifdef WIN32
113 // this squirreliness is b/c win32 has drive letters and *nix doesn't...
114 || file[1] == ':'
115 #endif//WIN32
116 )
117 {
118 return file;
119 }
120 vector<string> pathtmp ( path );
121 string tmp ( file );
122 const char* prev = strtok ( &tmp[0], "/\\" );
123 const char* p = strtok ( NULL, "/\\" );
124 while ( p )
125 {
126 if ( !strcmp ( prev, "." ) )
127 ; // do nothing
128 else if ( !strcmp ( prev, ".." ) )
129 {
130 // this squirreliness is b/c win32 has drive letters and *nix doesn't...
131 #ifdef WIN32
132 if ( pathtmp.size() > 1 )
133 #else
134 if ( pathtmp.size() )
135 #endif
136 pathtmp.resize ( pathtmp.size() - 1 );
137 }
138 else
139 pathtmp.push_back ( prev );
140 prev = p;
141 p = strtok ( NULL, "/\\" );
142 }
143 if ( include_filename )
144 pathtmp.push_back ( prev );
145
146 // reuse tmp variable to return recombined path
147 tmp.resize(0);
148 for ( size_t i = 0; i < pathtmp.size(); i++ )
149 {
150 // this squirreliness is b/c win32 has drive letters and *nix doesn't...
151 #ifdef WIN32
152 if ( i ) tmp += "/";
153 #else
154 tmp += "/";
155 #endif
156 tmp += pathtmp[i];
157 }
158 return tmp;
159 }
160
161 string
162 Path::RelativeFromWorkingDirectory ()
163 {
164 string out = "";
165 for ( size_t i = 0; i < path.size(); i++ )
166 {
167 out += "/" + path[i];
168 }
169 return RelativeFromWorkingDirectory ( out );
170 }
171
172 string
173 Path::RelativeFromWorkingDirectory ( const string& path )
174 {
175 return Path::RelativeFromDirectory ( path, working_directory );
176 }
177
178 string
179 Path::RelativeFromDirectory (
180 const string& path,
181 const string& base_directory )
182 {
183 vector<string> vbase, vpath, vout;
184 Path::Split ( vbase, base_directory, true );
185 Path::Split ( vpath, path, true );
186 #ifdef WIN32
187 // this squirreliness is b/c win32 has drive letters and *nix doesn't...
188 // not possible to do relative across different drive letters
189 {
190 char path_driveletter = (path[1] == ':') ? toupper(path[0]) : 0;
191 char base_driveletter = (base_directory[1] == ':') ? toupper(base_directory[0]) : 0;
192 if ( path_driveletter != base_driveletter )
193 return path;
194 }
195 #endif
196 size_t i = 0;
197 while ( i < vbase.size() && i < vpath.size() && vbase[i] == vpath[i] )
198 ++i;
199
200 // did we go through all of the path?
201 if ( vbase.size() == vpath.size() && i == vpath.size() )
202 return ".";
203
204 if ( i < vbase.size() )
205 {
206 // path goes above our base directory, we will need some ..'s
207 for ( size_t j = i; j < vbase.size(); j++ )
208 vout.push_back ( ".." );
209 }
210
211 while ( i < vpath.size() )
212 vout.push_back ( vpath[i++] );
213
214 // now merge vout into a string again
215 string out = vout[0];
216 for ( i = 1; i < vout.size(); i++ )
217 {
218 out += "/" + vout[i];
219 }
220 return out;
221 }
222
223 void
224 Path::Split ( vector<string>& out,
225 const string& path,
226 bool include_last )
227 {
228 string s ( path );
229 const char* prev = strtok ( &s[0], "/\\" );
230 const char* p = strtok ( NULL, "/\\" );
231 out.resize ( 0 );
232 while ( p )
233 {
234 if ( strcmp ( prev, "." ) )
235 out.push_back ( prev );
236 prev = p;
237 p = strtok ( NULL, "/\\" );
238 }
239 if ( include_last && strcmp ( prev, "." ) )
240 out.push_back ( prev );
241 // special-case where path only has "."
242 // don't move this check up higher as it might miss
243 // some funny paths...
244 if ( !out.size() && !strcmp ( prev, "." ) )
245 out.push_back ( "." );
246 }
247
248 XMLFile::XMLFile()
249 {
250 }
251
252 void
253 XMLFile::close()
254 {
255 _buf.resize(0);
256 _p = _end = NULL;
257 }
258
259 bool
260 XMLFile::open(const string& filename_)
261 {
262 close();
263 FILE* f = fopen ( filename_.c_str(), "rb" );
264 if ( !f )
265 return false;
266 unsigned long len = (unsigned long)filelen(f);
267 _buf.resize ( len );
268 fread ( &_buf[0], 1, len, f );
269 fclose ( f );
270 _p = _buf.c_str();
271 _end = _p + len;
272 _filename = filename_;
273 next_token();
274 return true;
275 }
276
277 // next_token() moves the pointer to next token, which may be
278 // an xml element or a text element, basically it's a glorified
279 // skipspace, normally the user of this class won't need to call
280 // this function
281 void
282 XMLFile::next_token()
283 {
284 _p += strspn ( _p, WS );
285 }
286
287 bool
288 XMLFile::next_is_text()
289 {
290 return *_p != '<';
291 }
292
293 bool
294 XMLFile::more_tokens()
295 {
296 return _p != _end;
297 }
298
299 // get_token() is used to return a token, and move the pointer
300 // past the token
301 bool
302 XMLFile::get_token(string& token)
303 {
304 const char* tokend;
305 if ( !strncmp ( _p, "<!--", 4 ) )
306 {
307 tokend = strstr ( _p, "-->" );
308 if ( !tokend )
309 tokend = _end;
310 else
311 tokend += 3;
312 }
313 else if ( !strncmp ( _p, "<?", 2 ) )
314 {
315 tokend = strstr ( _p, "?>" );
316 if ( !tokend )
317 tokend = _end;
318 else
319 tokend += 2;
320 }
321 else if ( *_p == '<' )
322 {
323 tokend = strchr ( _p, '>' );
324 if ( !tokend )
325 tokend = _end;
326 else
327 ++tokend;
328 }
329 else
330 {
331 tokend = strchr ( _p, '<' );
332 if ( !tokend )
333 tokend = _end;
334 while ( tokend > _p && isspace(tokend[-1]) )
335 --tokend;
336 }
337 if ( tokend == _p )
338 return false;
339 token = string ( _p, tokend-_p );
340 _p = tokend;
341 next_token();
342 return true;
343 }
344
345 string
346 XMLFile::Location() const
347 {
348 int line = 1;
349 const char* p = strchr ( _buf.c_str(), '\n' );
350 while ( p && p < _p )
351 {
352 ++line;
353 p = strchr ( p+1, '\n' );
354 }
355 return ssprintf ( "%s(%i)",_filename.c_str(), line );
356 }
357
358 XMLAttribute::XMLAttribute()
359 {
360 }
361
362 XMLAttribute::XMLAttribute(const string& name_,
363 const string& value_)
364 : name(name_), value(value_)
365 {
366 }
367
368 XMLAttribute::XMLAttribute ( const XMLAttribute& src )
369 : name(src.name), value(src.value)
370 {
371
372 }
373
374 XMLAttribute& XMLAttribute::operator = ( const XMLAttribute& src )
375 {
376 name = src.name;
377 value = src.value;
378 return *this;
379 }
380
381 XMLElement::XMLElement ( XMLFile* xmlFile,
382 const string& location )
383 : xmlFile ( xmlFile ),
384 location ( location ),
385 parentElement ( NULL )
386 {
387 }
388
389 XMLElement::~XMLElement()
390 {
391 size_t i;
392 for ( i = 0; i < attributes.size(); i++ )
393 delete attributes[i];
394 for ( i = 0; i < subElements.size(); i++ )
395 delete subElements[i];
396 }
397
398 void
399 XMLElement::AddSubElement ( XMLElement* e )
400 {
401 subElements.push_back ( e );
402 e->parentElement = this;
403 }
404
405 // Parse()
406 // This function takes a single xml tag ( i.e. beginning with '<' and
407 // ending with '>', and parses out it's tag name and constituent
408 // attributes.
409 // Return Value: returns true if you need to look for a </tag> for
410 // the one it just parsed...
411 bool
412 XMLElement::Parse(const string& token,
413 bool& end_tag)
414 {
415 const char* p = token.c_str();
416 assert ( *p == '<' );
417 ++p;
418 p += strspn ( p, WS );
419
420 // check if this is a comment
421 if ( !strncmp ( p, "!--", 3 ) )
422 {
423 name = "!--";
424 end_tag = false;
425 return false; // never look for end tag to a comment
426 }
427
428 end_tag = ( *p == '/' );
429 if ( end_tag )
430 {
431 ++p;
432 p += strspn ( p, WS );
433 }
434 const char* end = strpbrk ( p, WS );
435 if ( !end )
436 {
437 end = strpbrk ( p, "/>" );
438 assert ( end );
439 }
440 name = string ( p, end-p );
441 p = end;
442 p += strspn ( p, WS );
443 while ( *p != '>' && *p != '/' )
444 {
445 end = strpbrk ( p, WSEQ );
446 if ( !end )
447 {
448 end = strpbrk ( p, "/>" );
449 assert ( end );
450 }
451 string attribute ( p, end-p ), value;
452 p = end;
453 p += strspn ( p, WS );
454 if ( *p == '=' )
455 {
456 ++p;
457 p += strspn ( p, WS );
458 char quote = 0;
459 if ( strchr ( "\"'", *p ) )
460 {
461 quote = *p++;
462 end = strchr ( p, quote );
463 }
464 else
465 {
466 end = strpbrk ( p, WS );
467 }
468 if ( !end )
469 {
470 end = strchr ( p, '>' );
471 assert(end);
472 if ( end[-1] == '/' )
473 end--;
474 }
475 value = string ( p, end-p );
476 p = end;
477 if ( quote && *p == quote )
478 p++;
479 p += strspn ( p, WS );
480 }
481 else if ( name[0] != '!' )
482 {
483 throw XMLSyntaxErrorException ( location,
484 "attributes must have values" );
485 }
486 attributes.push_back ( new XMLAttribute ( attribute, value ) );
487 }
488 return !( *p == '/' ) && !end_tag;
489 }
490
491 XMLAttribute*
492 XMLElement::GetAttribute ( const string& attribute,
493 bool required )
494 {
495 // this would be faster with a tree-based container, but our attribute
496 // lists are likely to stay so short as to not be an issue.
497 for ( size_t i = 0; i < attributes.size(); i++ )
498 {
499 if ( attribute == attributes[i]->name )
500 return attributes[i];
501 }
502 if ( required )
503 {
504 throw RequiredAttributeNotFoundException ( location,
505 attribute,
506 name );
507 }
508 return NULL;
509 }
510
511 const XMLAttribute*
512 XMLElement::GetAttribute ( const string& attribute,
513 bool required ) const
514 {
515 // this would be faster with a tree-based container, but our attribute
516 // lists are likely to stay so short as to not be an issue.
517 for ( size_t i = 0; i < attributes.size(); i++ )
518 {
519 if ( attribute == attributes[i]->name )
520 return attributes[i];
521 }
522 if ( required )
523 {
524 throw RequiredAttributeNotFoundException ( location,
525 attribute,
526 name );
527 }
528 return NULL;
529 }
530
531 // XMLParse()
532 // This function reads a "token" from the file loaded in XMLFile
533 // if it finds a tag that is non-singular, it parses sub-elements and/or
534 // inner text into the XMLElement that it is building to return.
535 // Return Value: an XMLElement allocated via the new operator that contains
536 // it's parsed data. Keep calling this function until it returns NULL
537 // (no more data)
538 XMLElement*
539 XMLParse ( XMLFile& f,
540 XMLIncludes* includes,
541 const Path& path,
542 bool* pend_tag = NULL )
543 {
544 string token;
545 if ( !f.get_token(token) )
546 return NULL;
547 bool end_tag, is_include = false;
548
549 while ( token[0] != '<'
550 || !strncmp ( token.c_str (), "<!--", 4 )
551 || !strncmp ( token.c_str (), "<?", 2 ) )
552 {
553 if ( token[0] != '<' )
554 throw XMLSyntaxErrorException ( f.Location (),
555 "expecting xml tag, not '%s'",
556 token.c_str () );
557 if ( !f.get_token(token) )
558 return NULL;
559 }
560
561 XMLElement* e = new XMLElement ( &f,
562 f.Location () );
563 bool bNeedEnd = e->Parse ( token, end_tag );
564
565 if ( e->name == "xi:include" && includes )
566 {
567 XMLAttribute* att;
568 att = e->GetAttribute ( "href", true );
569 assert ( att );
570 string includeFile ( path.Fixup ( att->value, true ) );
571 string topIncludeFile ( Path::RelativeFromWorkingDirectory ( includeFile ) );
572 includes->push_back ( new XMLInclude ( e, path, topIncludeFile ) );
573 is_include = true;
574 }
575
576 if ( !bNeedEnd )
577 {
578 if ( pend_tag )
579 *pend_tag = end_tag;
580 else if ( end_tag )
581 {
582 delete e;
583 throw XMLSyntaxErrorException ( f.Location (),
584 "end tag '%s' not expected",
585 token.c_str() );
586 return NULL;
587 }
588 return e;
589 }
590 bool bThisMixingErrorReported = false;
591 while ( f.more_tokens () )
592 {
593 if ( f.next_is_text () )
594 {
595 if ( !f.get_token ( token ) || token.size () == 0 )
596 {
597 throw InvalidBuildFileException (
598 f.Location(),
599 "internal tool error - get_token() failed when more_tokens() returned true" );
600 break;
601 }
602 if ( e->subElements.size() && !bThisMixingErrorReported )
603 {
604 throw XMLSyntaxErrorException ( f.Location (),
605 "mixing of inner text with sub elements" );
606 bThisMixingErrorReported = true;
607 }
608 if ( strchr ( token.c_str (), '>' ) )
609 {
610 throw XMLSyntaxErrorException ( f.Location (),
611 "invalid symbol '>'" );
612 }
613 if ( e->value.size() > 0 )
614 {
615 throw XMLSyntaxErrorException ( f.Location (),
616 "multiple instances of inner text" );
617 e->value += " " + token;
618 }
619 else
620 e->value = token;
621 }
622 else
623 {
624 XMLElement* e2 = XMLParse ( f, is_include ? NULL : includes, path, &end_tag );
625 if ( !e2 )
626 {
627 throw InvalidBuildFileException (
628 e->location,
629 "end of file found looking for end tag" );
630 break;
631 }
632 if ( end_tag )
633 {
634 if ( e->name != e2->name )
635 {
636 delete e2;
637 throw XMLSyntaxErrorException ( f.Location (),
638 "end tag name mismatch" );
639 break;
640 }
641 delete e2;
642 break;
643 }
644 if ( e->value.size () > 0 && !bThisMixingErrorReported )
645 {
646 throw XMLSyntaxErrorException ( f.Location (),
647 "mixing of inner text with sub elements" );
648 bThisMixingErrorReported = true;
649 }
650 e->AddSubElement ( e2 );
651 }
652 }
653 return e;
654 }
655
656 void
657 XMLReadFile ( XMLFile& f, XMLElement& head, XMLIncludes& includes, const Path& path )
658 {
659 for ( ;; )
660 {
661 XMLElement* e = XMLParse ( f, &includes, path );
662 if ( !e )
663 return;
664 head.AddSubElement ( e );
665 }
666 }
667
668 XMLElement*
669 XMLLoadInclude ( XMLInclude& include,
670 XMLIncludes& includes )
671 {
672 XMLAttribute* att;
673 att = include.e->GetAttribute("href", true);
674 assert(att);
675
676 string file ( include.path.Fixup(att->value, true) );
677 string top_file ( Path::RelativeFromWorkingDirectory ( file ) );
678 include.e->attributes.push_back ( new XMLAttribute ( "top_href", top_file ) );
679 XMLFile* fInc = new XMLFile();
680 if ( !fInc->open ( file ) )
681 {
682 include.fileExists = false;
683 // look for xi:fallback element
684 for ( size_t i = 0; i < include.e->subElements.size (); i++ )
685 {
686 XMLElement* e2 = include.e->subElements[i];
687 if ( e2->name == "xi:fallback" )
688 {
689 // now look for xi:include below...
690 for ( i = 0; i < e2->subElements.size (); i++ )
691 {
692 XMLElement* e3 = e2->subElements[i];
693 if ( e3->name == "xi:include" )
694 {
695 att = e3->GetAttribute ( "href", true );
696 assert ( att );
697 string includeFile ( include.path.Fixup ( att->value, true ) );
698 string topIncludeFile ( Path::RelativeFromWorkingDirectory ( includeFile ) );
699 XMLInclude* fallbackInclude = new XMLInclude ( e3, include.path, topIncludeFile );
700 return XMLLoadInclude ( *fallbackInclude, includes );
701 }
702 }
703 throw InvalidBuildFileException (
704 e2->location,
705 "<xi:fallback> must have a <xi:include> sub-element" );
706 return NULL;
707 }
708 }
709 return NULL;
710 }
711 else
712 {
713 include.fileExists = true;
714 XMLElement* new_e = new XMLElement ( fInc,
715 include.e->location );
716 new_e->name = "xi:included";
717 Path path2 ( include.path, att->value );
718 XMLReadFile ( *fInc, *new_e, includes, path2 );
719 return new_e;
720 }
721 }
722
723 XMLElement*
724 XMLLoadFile ( const string& filename,
725 const Path& path,
726 XMLIncludes& includes )
727 {
728 XMLFile* f = new XMLFile();
729
730 if ( !f->open ( filename ) )
731 throw FileNotFoundException ( filename );
732
733 XMLElement* head = new XMLElement ( f,
734 "(virtual)" );
735
736 XMLReadFile ( *f, *head, includes, path );
737
738 for ( size_t i = 0; i < includes.size (); i++ )
739 {
740 XMLElement* e = includes[i]->e;
741 XMLElement* e2 = XMLLoadInclude ( *includes[i], includes );
742 if ( !e2 )
743 {
744 throw FileNotFoundException (
745 ssprintf ( "%s (referenced from %s)",
746 e->GetAttribute ( "top_href", true )->value.c_str (),
747 f->Location ().c_str () ) );
748 }
749 XMLElement* parent = e->parentElement;
750 XMLElement** parent_container = NULL;
751 if ( !parent )
752 {
753 delete e;
754 throw Exception ( "internal tool error: xi:include doesn't have a parent" );
755 return NULL;
756 }
757 for ( size_t j = 0; j < parent->subElements.size (); j++ )
758 {
759 if ( parent->subElements[j] == e )
760 {
761 parent_container = &parent->subElements[j];
762 break;
763 }
764 }
765 if ( !parent_container )
766 {
767 delete e;
768 throw Exception ( "internal tool error: couldn't find xi:include in parent's sub-elements" );
769 return NULL;
770 }
771 // replace inclusion tree with the imported tree
772 e2->parentElement = e->parentElement;
773 e2->name = e->name;
774 e2->attributes = e->attributes;
775 *parent_container = e2;
776 e->attributes.resize ( 0 );
777 delete e;
778 }
779 return head;
780 }