don't crash if path == base_directory.
[reactos.git] / reactos / tools / rbuild / XML.cpp
1 /*
2 * Copyright (C) 2005 Casper S. Hornstrup
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17 */
18 #include "pch.h"
19
20 #ifdef _MSC_VER
21 #define MAX_PATH _MAX_PATH
22 #endif
23
24 #ifdef WIN32
25 # include <direct.h>
26 # include <io.h>
27 #else
28 # include <sys/stat.h>
29 # define MAX_PATH PATH_MAX
30 #endif
31 #include <assert.h>
32
33 #include "XML.h"
34 #include "exception.h"
35 #include "ssprintf.h"
36
37 using std::string;
38 using std::vector;
39
40 #ifdef WIN32
41 #define getcwd _getcwd
42 #endif//WIN32
43
44 static const char* WS = " \t\r\n";
45 static const char* WSEQ = " =\t\r\n";
46
47 string working_directory;
48
49 XMLIncludes::~XMLIncludes()
50 {
51 for ( size_t i = 0; i < this->size(); i++ )
52 delete (*this)[i];
53 }
54
55 void
56 InitWorkingDirectory()
57 {
58 // store the current directory for path calculations
59 working_directory.resize ( _MAX_PATH );
60 working_directory[0] = 0;
61 getcwd ( &working_directory[0], working_directory.size() );
62 working_directory.resize ( strlen ( working_directory.c_str() ) );
63 }
64
65 #ifdef _MSC_VER
66 unsigned __int64
67 #else
68 unsigned long long
69 #endif
70 filelen ( FILE* f )
71 {
72 #ifdef WIN32
73 return _filelengthi64 ( _fileno(f) );
74 #else
75 struct stat64 file_stat;
76 if ( fstat64(fileno(f), &file_stat) != 0 )
77 return 0;
78 return file_stat.st_size;
79 #endif
80 }
81
82 Path::Path()
83 {
84 if ( !working_directory.size() )
85 InitWorkingDirectory();
86 string s ( working_directory );
87 const char* p = strtok ( &s[0], "/\\" );
88 while ( p )
89 {
90 if ( *p )
91 path.push_back ( p );
92 p = strtok ( NULL, "/\\" );
93 }
94 }
95
96 Path::Path ( const Path& cwd, const string& file )
97 {
98 string s ( cwd.Fixup ( file, false ) );
99 const char* p = strtok ( &s[0], "/\\" );
100 while ( p )
101 {
102 if ( *p )
103 path.push_back ( p );
104 p = strtok ( NULL, "/\\" );
105 }
106 }
107
108 string
109 Path::Fixup ( const string& file, bool include_filename ) const
110 {
111 if ( strchr ( "/\\", file[0] )
112 #ifdef WIN32
113 // this squirreliness is b/c win32 has drive letters and *nix doesn't...
114 || file[1] == ':'
115 #endif//WIN32
116 )
117 {
118 return file;
119 }
120 vector<string> pathtmp ( path );
121 string tmp ( file );
122 const char* prev = strtok ( &tmp[0], "/\\" );
123 const char* p = strtok ( NULL, "/\\" );
124 while ( p )
125 {
126 if ( !strcmp ( prev, "." ) )
127 ; // do nothing
128 else if ( !strcmp ( prev, ".." ) )
129 {
130 // this squirreliness is b/c win32 has drive letters and *nix doesn't...
131 #ifdef WIN32
132 if ( pathtmp.size() > 1 )
133 #else
134 if ( pathtmp.size() )
135 #endif
136 pathtmp.resize ( pathtmp.size() - 1 );
137 }
138 else
139 pathtmp.push_back ( prev );
140 prev = p;
141 p = strtok ( NULL, "/\\" );
142 }
143 if ( include_filename )
144 pathtmp.push_back ( prev );
145
146 // reuse tmp variable to return recombined path
147 tmp.resize(0);
148 for ( size_t i = 0; i < pathtmp.size(); i++ )
149 {
150 // this squirreliness is b/c win32 has drive letters and *nix doesn't...
151 #ifdef WIN32
152 if ( i ) tmp += "/";
153 #else
154 tmp += "/";
155 #endif
156 tmp += pathtmp[i];
157 }
158 return tmp;
159 }
160
161 string
162 Path::RelativeFromWorkingDirectory ()
163 {
164 string out = "";
165 for ( size_t i = 0; i < path.size(); i++ )
166 {
167 out += "/" + path[i];
168 }
169 return RelativeFromWorkingDirectory ( out );
170 }
171
172 string
173 Path::RelativeFromWorkingDirectory ( const string& path )
174 {
175 return Path::RelativeFromDirectory ( path, working_directory );
176 }
177
178 string
179 Path::RelativeFromDirectory (
180 const string& path,
181 const string& base_directory )
182 {
183 vector<string> vbase, vpath, vout;
184 Path::Split ( vbase, base_directory, true );
185 Path::Split ( vpath, path, true );
186 #ifdef WIN32
187 // this squirreliness is b/c win32 has drive letters and *nix doesn't...
188 // not possible to do relative across different drive letters
189 if ( vbase[0] != vpath[0] )
190 return path;
191 #endif
192 size_t i = 0;
193 while ( i < vbase.size() && i < vpath.size() && vbase[i] == vpath[i] )
194 ++i;
195 if ( vbase.size() == vpath.size() && i == vpath.size() )
196 return ".";
197 if ( i < vbase.size() )
198 {
199 // path goes above our base directory, we will need some ..'s
200 for ( size_t j = 0; j < i; j++ )
201 vout.push_back ( ".." );
202 }
203 while ( i < vpath.size() )
204 vout.push_back ( vpath[i++] );
205
206 // now merge vout into a string again
207 string out = vout[0];
208 for ( i = 1; i < vout.size(); i++ )
209 {
210 out += "/" + vout[i];
211 }
212 return out;
213 }
214
215 void
216 Path::Split ( vector<string>& out,
217 const string& path,
218 bool include_last )
219 {
220 string s ( path );
221 const char* prev = strtok ( &s[0], "/\\" );
222 const char* p = strtok ( NULL, "/\\" );
223 out.resize ( 0 );
224 while ( p )
225 {
226 out.push_back ( prev );
227 prev = p;
228 p = strtok ( NULL, "/\\" );
229 }
230 if ( include_last )
231 out.push_back ( prev );
232 }
233
234 XMLFile::XMLFile()
235 {
236 }
237
238 void
239 XMLFile::close()
240 {
241 _buf.resize(0);
242 _p = _end = NULL;
243 }
244
245 bool
246 XMLFile::open(const string& filename_)
247 {
248 close();
249 FILE* f = fopen ( filename_.c_str(), "rb" );
250 if ( !f )
251 return false;
252 unsigned long len = (unsigned long)filelen(f);
253 _buf.resize ( len );
254 fread ( &_buf[0], 1, len, f );
255 fclose ( f );
256 _p = _buf.c_str();
257 _end = _p + len;
258 _filename = filename_;
259 next_token();
260 return true;
261 }
262
263 // next_token() moves the pointer to next token, which may be
264 // an xml element or a text element, basically it's a glorified
265 // skipspace, normally the user of this class won't need to call
266 // this function
267 void
268 XMLFile::next_token()
269 {
270 _p += strspn ( _p, WS );
271 }
272
273 bool
274 XMLFile::next_is_text()
275 {
276 return *_p != '<';
277 }
278
279 bool
280 XMLFile::more_tokens()
281 {
282 return _p != _end;
283 }
284
285 // get_token() is used to return a token, and move the pointer
286 // past the token
287 bool
288 XMLFile::get_token(string& token)
289 {
290 const char* tokend;
291 if ( !strncmp ( _p, "<!--", 4 ) )
292 {
293 tokend = strstr ( _p, "-->" );
294 if ( !tokend )
295 tokend = _end;
296 else
297 tokend += 3;
298 }
299 else if ( !strncmp ( _p, "<?", 2 ) )
300 {
301 tokend = strstr ( _p, "?>" );
302 if ( !tokend )
303 tokend = _end;
304 else
305 tokend += 2;
306 }
307 else if ( *_p == '<' )
308 {
309 tokend = strchr ( _p, '>' );
310 if ( !tokend )
311 tokend = _end;
312 else
313 ++tokend;
314 }
315 else
316 {
317 tokend = strchr ( _p, '<' );
318 if ( !tokend )
319 tokend = _end;
320 while ( tokend > _p && isspace(tokend[-1]) )
321 --tokend;
322 }
323 if ( tokend == _p )
324 return false;
325 token = string ( _p, tokend-_p );
326 _p = tokend;
327 next_token();
328 return true;
329 }
330
331 string
332 XMLFile::Location() const
333 {
334 int line = 1;
335 const char* p = strchr ( _buf.c_str(), '\n' );
336 while ( p && p < _p )
337 {
338 ++line;
339 p = strchr ( p+1, '\n' );
340 }
341 return ssprintf ( "%s(%i)",_filename.c_str(), line );
342 }
343
344 XMLAttribute::XMLAttribute()
345 {
346 }
347
348 XMLAttribute::XMLAttribute(const string& name_,
349 const string& value_)
350 : name(name_), value(value_)
351 {
352 }
353
354 XMLAttribute::XMLAttribute ( const XMLAttribute& src )
355 : name(src.name), value(src.value)
356 {
357
358 }
359
360 XMLAttribute& XMLAttribute::operator = ( const XMLAttribute& src )
361 {
362 name = src.name;
363 value = src.value;
364 return *this;
365 }
366
367 XMLElement::XMLElement ( XMLFile* xmlFile,
368 const string& location )
369 : xmlFile ( xmlFile ),
370 location ( location ),
371 parentElement ( NULL )
372 {
373 }
374
375 XMLElement::~XMLElement()
376 {
377 size_t i;
378 for ( i = 0; i < attributes.size(); i++ )
379 delete attributes[i];
380 for ( i = 0; i < subElements.size(); i++ )
381 delete subElements[i];
382 }
383
384 void
385 XMLElement::AddSubElement ( XMLElement* e )
386 {
387 subElements.push_back ( e );
388 e->parentElement = this;
389 }
390
391 // Parse()
392 // This function takes a single xml tag ( i.e. beginning with '<' and
393 // ending with '>', and parses out it's tag name and constituent
394 // attributes.
395 // Return Value: returns true if you need to look for a </tag> for
396 // the one it just parsed...
397 bool
398 XMLElement::Parse(const string& token,
399 bool& end_tag)
400 {
401 const char* p = token.c_str();
402 assert ( *p == '<' );
403 ++p;
404 p += strspn ( p, WS );
405
406 // check if this is a comment
407 if ( !strncmp ( p, "!--", 3 ) )
408 {
409 name = "!--";
410 end_tag = false;
411 return false; // never look for end tag to a comment
412 }
413
414 end_tag = ( *p == '/' );
415 if ( end_tag )
416 {
417 ++p;
418 p += strspn ( p, WS );
419 }
420 const char* end = strpbrk ( p, WS );
421 if ( !end )
422 {
423 end = strpbrk ( p, "/>" );
424 assert ( end );
425 }
426 name = string ( p, end-p );
427 p = end;
428 p += strspn ( p, WS );
429 while ( *p != '>' && *p != '/' )
430 {
431 end = strpbrk ( p, WSEQ );
432 if ( !end )
433 {
434 end = strpbrk ( p, "/>" );
435 assert ( end );
436 }
437 string attribute ( p, end-p ), value;
438 p = end;
439 p += strspn ( p, WS );
440 if ( *p == '=' )
441 {
442 ++p;
443 p += strspn ( p, WS );
444 char quote = 0;
445 if ( strchr ( "\"'", *p ) )
446 {
447 quote = *p++;
448 end = strchr ( p, quote );
449 }
450 else
451 {
452 end = strpbrk ( p, WS );
453 }
454 if ( !end )
455 {
456 end = strchr ( p, '>' );
457 assert(end);
458 if ( end[-1] == '/' )
459 end--;
460 }
461 value = string ( p, end-p );
462 p = end;
463 if ( quote && *p == quote )
464 p++;
465 p += strspn ( p, WS );
466 }
467 else if ( name[0] != '!' )
468 {
469 throw XMLSyntaxErrorException ( location,
470 "attributes must have values" );
471 }
472 attributes.push_back ( new XMLAttribute ( attribute, value ) );
473 }
474 return !( *p == '/' ) && !end_tag;
475 }
476
477 XMLAttribute*
478 XMLElement::GetAttribute ( const string& attribute,
479 bool required )
480 {
481 // this would be faster with a tree-based container, but our attribute
482 // lists are likely to stay so short as to not be an issue.
483 for ( size_t i = 0; i < attributes.size(); i++ )
484 {
485 if ( attribute == attributes[i]->name )
486 return attributes[i];
487 }
488 if ( required )
489 {
490 throw RequiredAttributeNotFoundException ( location,
491 attribute,
492 name );
493 }
494 return NULL;
495 }
496
497 const XMLAttribute*
498 XMLElement::GetAttribute ( const string& attribute,
499 bool required ) const
500 {
501 // this would be faster with a tree-based container, but our attribute
502 // lists are likely to stay so short as to not be an issue.
503 for ( size_t i = 0; i < attributes.size(); i++ )
504 {
505 if ( attribute == attributes[i]->name )
506 return attributes[i];
507 }
508 if ( required )
509 {
510 throw RequiredAttributeNotFoundException ( location,
511 attribute,
512 name );
513 }
514 return NULL;
515 }
516
517 // XMLParse()
518 // This function reads a "token" from the file loaded in XMLFile
519 // if it finds a tag that is non-singular, it parses sub-elements and/or
520 // inner text into the XMLElement that it is building to return.
521 // Return Value: an XMLElement allocated via the new operator that contains
522 // it's parsed data. Keep calling this function until it returns NULL
523 // (no more data)
524 XMLElement*
525 XMLParse ( XMLFile& f,
526 XMLIncludes* includes,
527 const Path& path,
528 bool* pend_tag = NULL )
529 {
530 string token;
531 if ( !f.get_token(token) )
532 return NULL;
533 bool end_tag, is_include = false;
534
535 while ( token[0] != '<'
536 || !strncmp ( token.c_str (), "<!--", 4 )
537 || !strncmp ( token.c_str (), "<?", 2 ) )
538 {
539 if ( token[0] != '<' )
540 throw XMLSyntaxErrorException ( f.Location (),
541 "expecting xml tag, not '%s'",
542 token.c_str () );
543 if ( !f.get_token(token) )
544 return NULL;
545 }
546
547 XMLElement* e = new XMLElement ( &f,
548 f.Location () );
549 bool bNeedEnd = e->Parse ( token, end_tag );
550
551 if ( e->name == "xi:include" && includes )
552 {
553 XMLAttribute* att;
554 att = e->GetAttribute ( "href", true );
555 assert ( att );
556 string includeFile ( path.Fixup ( att->value, true ) );
557 string topIncludeFile ( Path::RelativeFromWorkingDirectory ( includeFile ) );
558 includes->push_back ( new XMLInclude ( e, path, topIncludeFile ) );
559 is_include = true;
560 }
561
562 if ( !bNeedEnd )
563 {
564 if ( pend_tag )
565 *pend_tag = end_tag;
566 else if ( end_tag )
567 {
568 delete e;
569 throw XMLSyntaxErrorException ( f.Location (),
570 "end tag '%s' not expected",
571 token.c_str() );
572 return NULL;
573 }
574 return e;
575 }
576 bool bThisMixingErrorReported = false;
577 while ( f.more_tokens () )
578 {
579 if ( f.next_is_text () )
580 {
581 if ( !f.get_token ( token ) || token.size () == 0 )
582 {
583 throw InvalidBuildFileException (
584 f.Location(),
585 "internal tool error - get_token() failed when more_tokens() returned true" );
586 break;
587 }
588 if ( e->subElements.size() && !bThisMixingErrorReported )
589 {
590 throw XMLSyntaxErrorException ( f.Location (),
591 "mixing of inner text with sub elements" );
592 bThisMixingErrorReported = true;
593 }
594 if ( strchr ( token.c_str (), '>' ) )
595 {
596 throw XMLSyntaxErrorException ( f.Location (),
597 "invalid symbol '>'" );
598 }
599 if ( e->value.size() > 0 )
600 {
601 throw XMLSyntaxErrorException ( f.Location (),
602 "multiple instances of inner text" );
603 e->value += " " + token;
604 }
605 else
606 e->value = token;
607 }
608 else
609 {
610 XMLElement* e2 = XMLParse ( f, is_include ? NULL : includes, path, &end_tag );
611 if ( !e2 )
612 {
613 throw InvalidBuildFileException (
614 e->location,
615 "end of file found looking for end tag" );
616 break;
617 }
618 if ( end_tag )
619 {
620 if ( e->name != e2->name )
621 {
622 delete e2;
623 throw XMLSyntaxErrorException ( f.Location (),
624 "end tag name mismatch" );
625 break;
626 }
627 delete e2;
628 break;
629 }
630 if ( e->value.size () > 0 && !bThisMixingErrorReported )
631 {
632 throw XMLSyntaxErrorException ( f.Location (),
633 "mixing of inner text with sub elements" );
634 bThisMixingErrorReported = true;
635 }
636 e->AddSubElement ( e2 );
637 }
638 }
639 return e;
640 }
641
642 void
643 XMLReadFile ( XMLFile& f, XMLElement& head, XMLIncludes& includes, const Path& path )
644 {
645 for ( ;; )
646 {
647 XMLElement* e = XMLParse ( f, &includes, path );
648 if ( !e )
649 return;
650 head.AddSubElement ( e );
651 }
652 }
653
654 XMLElement*
655 XMLLoadInclude ( XMLInclude& include,
656 XMLIncludes& includes )
657 {
658 XMLAttribute* att;
659 att = include.e->GetAttribute("href", true);
660 assert(att);
661
662 string file ( include.path.Fixup(att->value, true) );
663 string top_file ( Path::RelativeFromWorkingDirectory ( file ) );
664 include.e->attributes.push_back ( new XMLAttribute ( "top_href", top_file ) );
665 XMLFile* fInc = new XMLFile();
666 if ( !fInc->open ( file ) )
667 {
668 include.fileExists = false;
669 // look for xi:fallback element
670 for ( size_t i = 0; i < include.e->subElements.size (); i++ )
671 {
672 XMLElement* e2 = include.e->subElements[i];
673 if ( e2->name == "xi:fallback" )
674 {
675 // now look for xi:include below...
676 for ( i = 0; i < e2->subElements.size (); i++ )
677 {
678 XMLElement* e3 = e2->subElements[i];
679 if ( e3->name == "xi:include" )
680 {
681 att = e3->GetAttribute ( "href", true );
682 assert ( att );
683 string includeFile ( include.path.Fixup ( att->value, true ) );
684 string topIncludeFile ( Path::RelativeFromWorkingDirectory ( includeFile ) );
685 XMLInclude* fallbackInclude = new XMLInclude ( e3, include.path, topIncludeFile );
686 return XMLLoadInclude ( *fallbackInclude, includes );
687 }
688 }
689 throw InvalidBuildFileException (
690 e2->location,
691 "<xi:fallback> must have a <xi:include> sub-element" );
692 return NULL;
693 }
694 }
695 return NULL;
696 }
697 else
698 {
699 include.fileExists = true;
700 XMLElement* new_e = new XMLElement ( fInc,
701 include.e->location );
702 new_e->name = "xi:included";
703 Path path2 ( include.path, att->value );
704 XMLReadFile ( *fInc, *new_e, includes, path2 );
705 return new_e;
706 }
707 }
708
709 XMLElement*
710 XMLLoadFile ( const string& filename,
711 const Path& path,
712 XMLIncludes& includes )
713 {
714 XMLFile* f = new XMLFile();
715
716 if ( !f->open ( filename ) )
717 throw FileNotFoundException ( filename );
718
719 XMLElement* head = new XMLElement ( f,
720 "(virtual)" );
721
722 XMLReadFile ( *f, *head, includes, path );
723
724 for ( size_t i = 0; i < includes.size (); i++ )
725 {
726 XMLElement* e = includes[i]->e;
727 XMLElement* e2 = XMLLoadInclude ( *includes[i], includes );
728 if ( !e2 )
729 {
730 throw FileNotFoundException (
731 ssprintf ( "%s (referenced from %s)",
732 e->GetAttribute ( "top_href", true )->value.c_str (),
733 f->Location ().c_str () ) );
734 }
735 XMLElement* parent = e->parentElement;
736 XMLElement** parent_container = NULL;
737 if ( !parent )
738 {
739 delete e;
740 throw Exception ( "internal tool error: xi:include doesn't have a parent" );
741 return NULL;
742 }
743 for ( size_t j = 0; j < parent->subElements.size (); j++ )
744 {
745 if ( parent->subElements[j] == e )
746 {
747 parent_container = &parent->subElements[j];
748 break;
749 }
750 }
751 if ( !parent_container )
752 {
753 delete e;
754 throw Exception ( "internal tool error: couldn't find xi:include in parent's sub-elements" );
755 return NULL;
756 }
757 // replace inclusion tree with the imported tree
758 e2->parentElement = e->parentElement;
759 e2->name = e->name;
760 e2->attributes = e->attributes;
761 *parent_container = e2;
762 e->attributes.resize ( 0 );
763 delete e;
764 }
765 return head;
766 }