parse, but ignore, <? ?> tags - eliminated duplicate code ala FixSeparator() - fix...
[reactos.git] / reactos / tools / rbuild / XML.cpp
1 // XML.cpp
2
3 #include "pch.h"
4
5 #include <direct.h>
6 #include <io.h>
7 #include <assert.h>
8
9 #include "XML.h"
10 #include "exception.h"
11 #include "ssprintf.h"
12
13 using std::string;
14 using std::vector;
15
16 #ifdef WIN32
17 #define getcwd _getcwd
18 #endif//WIN32
19
20 static const char* WS = " \t\r\n";
21 static const char* WSEQ = " =\t\r\n";
22
23 string working_directory;
24
25 void
26 InitWorkingDirectory()
27 {
28 // store the current directory for path calculations
29 working_directory.resize ( _MAX_PATH );
30 working_directory[0] = 0;
31 getcwd ( &working_directory[0], working_directory.size() );
32 working_directory.resize ( strlen ( working_directory.c_str() ) );
33 }
34
35 #ifdef _MSC_VER
36 unsigned __int64
37 #else
38 unsigned long long
39 #endif
40 filelen ( FILE* f )
41 {
42 #ifdef WIN32
43 return _filelengthi64 ( _fileno(f) );
44 #elif defined(UNIX)
45 struct stat64 file_stat;
46 if ( fstat64(fileno(f), &file_stat) != 0 )
47 return 0;
48 return file_stat.st_size;
49 #endif
50 }
51
52 Path::Path()
53 {
54 if ( !working_directory.size() )
55 InitWorkingDirectory();
56 string s ( working_directory );
57 const char* p = strtok ( &s[0], "/\\" );
58 while ( p )
59 {
60 if ( *p )
61 path.push_back ( p );
62 p = strtok ( NULL, "/\\" );
63 }
64 }
65
66 Path::Path ( const Path& cwd, const string& file )
67 {
68 string s ( cwd.Fixup ( file, false ) );
69 const char* p = strtok ( &s[0], "/\\" );
70 while ( p )
71 {
72 if ( *p )
73 path.push_back ( p );
74 p = strtok ( NULL, "/\\" );
75 }
76 }
77
78 string
79 Path::Fixup ( const string& file, bool include_filename ) const
80 {
81 if ( strchr ( "/\\", file[0] )
82 #ifdef WIN32
83 // this squirreliness is b/c win32 has drive letters and *nix doesn't...
84 || file[1] == ':'
85 #endif//WIN32
86 )
87 {
88 return file;
89 }
90 vector<string> pathtmp ( path );
91 string tmp ( file );
92 const char* prev = strtok ( &tmp[0], "/\\" );
93 const char* p = strtok ( NULL, "/\\" );
94 while ( p )
95 {
96 if ( !strcmp ( prev, "." ) )
97 ; // do nothing
98 else if ( !strcmp ( prev, ".." ) )
99 {
100 // this squirreliness is b/c win32 has drive letters and *nix doesn't...
101 #ifdef WIN32
102 if ( pathtmp.size() > 1 )
103 #else
104 if ( pathtmp.size() )
105 #endif
106 pathtmp.resize ( pathtmp.size() - 1 );
107 }
108 else
109 pathtmp.push_back ( prev );
110 prev = p;
111 p = strtok ( NULL, "/\\" );
112 }
113 if ( include_filename )
114 pathtmp.push_back ( prev );
115
116 // reuse tmp variable to return recombined path
117 tmp.resize(0);
118 for ( size_t i = 0; i < pathtmp.size(); i++ )
119 {
120 // this squirreliness is b/c win32 has drive letters and *nix doesn't...
121 #ifdef WIN32
122 if ( i ) tmp += "/";
123 #else
124 tmp += "/";
125 #endif
126 tmp += pathtmp[i];
127 }
128 return tmp;
129 }
130
131 /*static*/ string
132 Path::RelativeFromWorkingDirectory ( const string& path )
133 {
134 vector<string> vwork, vpath, vout;
135 Path::Split ( vwork, working_directory, true );
136 Path::Split ( vpath, path, true );
137 #ifdef WIN32
138 // this squirreliness is b/c win32 has drive letters and *nix doesn't...
139 // not possible to do relative across different drive letters
140 if ( vwork[0] != vpath[0] )
141 return path;
142 #endif
143 size_t i = 0;
144 while ( i < vwork.size() && i < vpath.size() && vwork[i] == vpath[i] )
145 ++i;
146 if ( i < vwork.size() )
147 {
148 // path goes above our working directory, we will need some ..'s
149 for ( size_t j = 0; j < i; j++ )
150 vout.push_back ( ".." );
151 }
152 while ( i < vpath.size() )
153 vout.push_back ( vpath[i++] );
154
155 // now merge vout into a string again
156 string out;
157 for ( i = 0; i < vout.size(); i++ )
158 {
159 // this squirreliness is b/c win32 has drive letters and *nix doesn't...
160 #ifdef WIN32
161 if ( i ) out += "/";
162 #else
163 out += "/";
164 #endif
165 out += vout[i];
166 }
167 return out;
168 }
169
170 /*static*/ void
171 Path::Split ( vector<string>& out,
172 const string& path,
173 bool include_last )
174 {
175 string s ( path );
176 const char* prev = strtok ( &s[0], "/\\" );
177 const char* p = strtok ( NULL, "/\\" );
178 out.resize ( 0 );
179 while ( p )
180 {
181 out.push_back ( prev );
182 prev = p;
183 p = strtok ( NULL, "/\\" );
184 }
185 if ( include_last )
186 out.push_back ( prev );
187 }
188
189 XMLFile::XMLFile()
190 {
191 }
192
193 void
194 XMLFile::close()
195 {
196 _buf.resize(0);
197 _p = _end = NULL;
198 }
199
200 bool
201 XMLFile::open(const string& filename_)
202 {
203 close();
204 FILE* f = fopen ( filename_.c_str(), "rb" );
205 if ( !f )
206 return false;
207 unsigned long len = (unsigned long)filelen(f);
208 _buf.resize ( len );
209 fread ( &_buf[0], 1, len, f );
210 fclose ( f );
211 _p = _buf.c_str();
212 _end = _p + len;
213 _filename = filename_;
214 next_token();
215 return true;
216 }
217
218 // next_token() moves the pointer to next token, which may be
219 // an xml element or a text element, basically it's a glorified
220 // skipspace, normally the user of this class won't need to call
221 // this function
222 void
223 XMLFile::next_token()
224 {
225 _p += strspn ( _p, WS );
226 }
227
228 bool
229 XMLFile::next_is_text()
230 {
231 return *_p != '<';
232 }
233
234 bool
235 XMLFile::more_tokens()
236 {
237 return _p != _end;
238 }
239
240 // get_token() is used to return a token, and move the pointer
241 // past the token
242 bool
243 XMLFile::get_token(string& token)
244 {
245 const char* tokend;
246 if ( !strncmp ( _p, "<!--", 4 ) )
247 {
248 tokend = strstr ( _p, "-->" );
249 if ( !tokend )
250 tokend = _end;
251 else
252 tokend += 3;
253 }
254 else if ( !strncmp ( _p, "<?", 2 ) )
255 {
256 tokend = strstr ( _p, "?>" );
257 if ( !tokend )
258 tokend = _end;
259 else
260 tokend += 2;
261 }
262 else if ( *_p == '<' )
263 {
264 tokend = strchr ( _p, '>' );
265 if ( !tokend )
266 tokend = _end;
267 else
268 ++tokend;
269 }
270 else
271 {
272 tokend = strchr ( _p, '<' );
273 if ( !tokend )
274 tokend = _end;
275 while ( tokend > _p && isspace(tokend[-1]) )
276 --tokend;
277 }
278 if ( tokend == _p )
279 return false;
280 token = string ( _p, tokend-_p );
281 _p = tokend;
282 next_token();
283 return true;
284 }
285
286 string
287 XMLFile::Location() const
288 {
289 int line = 1;
290 const char* p = strchr ( _buf.c_str(), '\n' );
291 while ( p && p < _p )
292 {
293 ++line;
294 p = strchr ( p+1, '\n' );
295 }
296 return ssprintf ( "%s(%i)",_filename.c_str(), line );
297 }
298
299 XMLAttribute::XMLAttribute()
300 {
301 }
302
303 XMLAttribute::XMLAttribute(const string& name_,
304 const string& value_)
305 : name(name_), value(value_)
306 {
307 }
308
309 XMLElement::XMLElement()
310 : parentElement(NULL)
311 {
312 }
313
314 XMLElement::~XMLElement()
315 {
316 size_t i;
317 for ( i = 0; i < attributes.size(); i++ )
318 delete attributes[i];
319 for ( i = 0; i < subElements.size(); i++ )
320 delete subElements[i];
321 }
322
323 void
324 XMLElement::AddSubElement ( XMLElement* e )
325 {
326 subElements.push_back ( e );
327 e->parentElement = this;
328 }
329
330 // Parse()
331 // This function takes a single xml tag ( i.e. beginning with '<' and
332 // ending with '>', and parses out it's tag name and constituent
333 // attributes.
334 // Return Value: returns true if you need to look for a </tag> for
335 // the one it just parsed...
336 bool
337 XMLElement::Parse(const string& token,
338 bool& end_tag)
339 {
340 const char* p = token.c_str();
341 assert ( *p == '<' );
342 ++p;
343 p += strspn ( p, WS );
344
345 // check if this is a comment
346 if ( !strncmp ( p, "!--", 3 ) )
347 {
348 name = "!--";
349 end_tag = false;
350 return false; // never look for end tag to a comment
351 }
352
353 end_tag = ( *p == '/' );
354 if ( end_tag )
355 {
356 ++p;
357 p += strspn ( p, WS );
358 }
359 const char* end = strpbrk ( p, WS );
360 if ( !end )
361 {
362 end = strpbrk ( p, "/>" );
363 assert ( end );
364 }
365 name = string ( p, end-p );
366 p = end;
367 p += strspn ( p, WS );
368 while ( *p != '>' && *p != '/' )
369 {
370 end = strpbrk ( p, WSEQ );
371 if ( !end )
372 {
373 end = strpbrk ( p, "/>" );
374 assert ( end );
375 }
376 string attribute ( p, end-p ), value;
377 p = end;
378 p += strspn ( p, WS );
379 if ( *p == '=' )
380 {
381 ++p;
382 p += strspn ( p, WS );
383 char quote = 0;
384 if ( strchr ( "\"'", *p ) )
385 {
386 quote = *p++;
387 end = strchr ( p, quote );
388 }
389 else
390 {
391 end = strpbrk ( p, WS );
392 }
393 if ( !end )
394 {
395 end = strchr ( p, '>' );
396 assert(end);
397 if ( end[-1] == '/' )
398 end--;
399 }
400 value = string ( p, end-p );
401 p = end;
402 if ( quote && *p == quote )
403 p++;
404 p += strspn ( p, WS );
405 }
406 attributes.push_back ( new XMLAttribute ( attribute, value ) );
407 }
408 return !( *p == '/' ) && !end_tag;
409 }
410
411 XMLAttribute*
412 XMLElement::GetAttribute ( const string& attribute,
413 bool required )
414 {
415 // this would be faster with a tree-based container, but our attribute
416 // lists are likely to stay so short as to not be an issue.
417 for ( size_t i = 0; i < attributes.size(); i++ )
418 {
419 if ( attribute == attributes[i]->name )
420 return attributes[i];
421 }
422 if ( required )
423 {
424 throw RequiredAttributeNotFoundException ( attribute,
425 name );
426 }
427 return NULL;
428 }
429
430 const XMLAttribute*
431 XMLElement::GetAttribute ( const string& attribute,
432 bool required ) const
433 {
434 // this would be faster with a tree-based container, but our attribute
435 // lists are likely to stay so short as to not be an issue.
436 for ( size_t i = 0; i < attributes.size(); i++ )
437 {
438 if ( attribute == attributes[i]->name )
439 return attributes[i];
440 }
441 if ( required )
442 {
443 throw RequiredAttributeNotFoundException ( attribute,
444 name );
445 }
446 return NULL;
447 }
448
449 // XMLParse()
450 // This function reads a "token" from the file loaded in XMLFile
451 // REM TODO FIXME: At the moment it can't handle comments or non-xml tags.
452 // if it finds a tag that is non-singular, it parses sub-elements and/or
453 // inner text into the XMLElement that it is building to return.
454 // Return Value: an XMLElement allocated via the new operator that contains
455 // it's parsed data. Keep calling this function until it returns NULL
456 // (no more data)
457 XMLElement*
458 XMLParse(XMLFile& f,
459 const Path& path,
460 bool* pend_tag /*= NULL*/)
461 {
462 string token;
463 if ( !f.get_token(token) )
464 return NULL;
465 bool end_tag;
466
467 while ( token[0] != '<'
468 || !strncmp ( token.c_str(), "<!--", 4 )
469 || !strncmp ( token.c_str(), "<?", 2 ) )
470 {
471 if ( token[0] != '<' )
472 throw XMLSyntaxErrorException ( f.Location(),
473 "expecting xml tag, not '%s'",
474 token.c_str() );
475 if ( !f.get_token(token) )
476 return NULL;
477 }
478
479 XMLElement* e = new XMLElement;
480 bool bNeedEnd = e->Parse ( token, end_tag );
481
482 if ( e->name == "xi:include" )
483 {
484 XMLAttribute* att;
485 att = e->GetAttribute("href",true);
486 assert(att);
487
488 string file ( path.Fixup(att->value,true) );
489 string top_file ( Path::RelativeFromWorkingDirectory ( file ) );
490 e->attributes.push_back ( new XMLAttribute ( "top_href", top_file ) );
491 XMLFile fInc;
492 if ( !fInc.open ( file ) )
493 throw FileNotFoundException (
494 ssprintf("%s (referenced from %s)",
495 file.c_str(),
496 f.Location().c_str() ) );
497 else
498 {
499 Path path2 ( path, att->value );
500 for ( ;; )
501 {
502 XMLElement* e2 = XMLParse ( fInc, path2 );
503 if ( !e2 )
504 break;
505 e->AddSubElement ( e2 );
506 }
507 }
508 }
509
510 if ( !bNeedEnd )
511 {
512 if ( pend_tag )
513 *pend_tag = end_tag;
514 else if ( end_tag )
515 {
516 delete e;
517 throw XMLSyntaxErrorException ( f.Location(),
518 "end tag '%s' not expected",
519 token.c_str() );
520 return NULL;
521 }
522 return e;
523 }
524 bool bThisMixingErrorReported = false;
525 while ( f.more_tokens() )
526 {
527 if ( f.next_is_text() )
528 {
529 if ( !f.get_token ( token ) || !token.size() )
530 {
531 throw Exception ( "internal tool error - get_token() failed when more_tokens() returned true" );
532 break;
533 }
534 if ( e->subElements.size() && !bThisMixingErrorReported )
535 {
536 throw XMLSyntaxErrorException ( f.Location(),
537 "mixing of inner text with sub elements" );
538 bThisMixingErrorReported = true;
539 }
540 if ( strchr ( token.c_str(), '>' ) )
541 {
542 throw XMLSyntaxErrorException ( f.Location(),
543 "invalid symbol '>'" );
544 }
545 if ( e->value.size() )
546 {
547 throw XMLSyntaxErrorException ( f.Location(),
548 "multiple instances of inner text" );
549 e->value += " " + token;
550 }
551 else
552 e->value = token;
553 }
554 else
555 {
556 XMLElement* e2 = XMLParse ( f, path, &end_tag );
557 if ( end_tag )
558 {
559 if ( e->name != e2->name )
560 throw XMLSyntaxErrorException ( f.Location(),
561 "end tag name mismatch" );
562 delete e2;
563 break;
564 }
565 if ( e->value.size() && !bThisMixingErrorReported )
566 {
567 throw XMLSyntaxErrorException ( f.Location(),
568 "mixing of inner text with sub elements" );
569 bThisMixingErrorReported = true;
570 }
571 e->AddSubElement ( e2 );
572 }
573 }
574 return e;
575 }