26b25c38998e760fee7b3d22909a09e5c10e4321
[reactos.git] / reactos / tools / rbuild / XML.cpp
1 // XML.cpp
2
3 #ifdef _MSC_VER
4 #pragma warning ( disable : 4786 ) // identifier was truncated to '255' characters in the debug information
5 #endif//_MSC_VER
6
7 #include <direct.h>
8 #include <io.h>
9 #include <assert.h>
10
11 #include "XML.h"
12
13 using std::string;
14 using std::vector;
15
16 #ifdef WIN32
17 #define getcwd _getcwd
18 #endif//WIN32
19
20 static const char* WS = " \t\r\n";
21 static const char* WSEQ = " =\t\r\n";
22
23 string working_directory;
24
25 void
26 InitWorkingDirectory()
27 {
28 // store the current directory for path calculations
29 working_directory.resize ( _MAX_PATH );
30 working_directory[0] = 0;
31 getcwd ( &working_directory[0], working_directory.size() );
32 working_directory.resize ( strlen ( working_directory.c_str() ) );
33 }
34
35 #ifdef _MSC_VER
36 unsigned __int64
37 #else
38 unsigned long long
39 #endif
40 filelen ( FILE* f )
41 {
42 #ifdef WIN32
43 return _filelengthi64 ( _fileno(f) );
44 #elif defined(UNIX)
45 struct stat64 file_stat;
46 if ( fstat64(fileno(f), &file_stat) != 0 )
47 return 0;
48 return file_stat.st_size;
49 #endif
50 }
51
52 Path::Path()
53 {
54 string s ( working_directory );
55 const char* p = strtok ( &s[0], "/\\" );
56 while ( p )
57 {
58 if ( *p )
59 path.push_back ( p );
60 p = strtok ( NULL, "/\\" );
61 }
62 }
63
64 Path::Path ( const Path& cwd, const string& file )
65 {
66 string s ( cwd.Fixup ( file, false ) );
67 const char* p = strtok ( &s[0], "/\\" );
68 while ( p )
69 {
70 if ( *p )
71 path.push_back ( p );
72 p = strtok ( NULL, "/\\" );
73 }
74 }
75
76 string
77 Path::Fixup ( const string& file, bool include_filename ) const
78 {
79 if ( strchr ( "/\\", file[0] )
80 #ifdef WIN32
81 // this squirreliness is b/c win32 has drive letters and *nix doesn't...
82 || file[1] == ':'
83 #endif//WIN32
84 )
85 {
86 return file;
87 }
88 vector<string> pathtmp ( path );
89 string tmp ( file );
90 const char* prev = strtok ( &tmp[0], "/\\" );
91 const char* p = strtok ( NULL, "/\\" );
92 while ( p )
93 {
94 if ( !strcmp ( prev, "." ) )
95 ; // do nothing
96 else if ( !strcmp ( prev, ".." ) )
97 {
98 // this squirreliness is b/c win32 has drive letters and *nix doesn't...
99 #ifdef WIN32
100 if ( pathtmp.size() > 1 )
101 #else
102 if ( pathtmp.size() )
103 #endif
104 pathtmp.resize ( pathtmp.size() - 1 );
105 }
106 else
107 pathtmp.push_back ( prev );
108 prev = p;
109 p = strtok ( NULL, "/\\" );
110 }
111 if ( include_filename )
112 pathtmp.push_back ( prev );
113
114 // reuse tmp variable to return recombined path
115 tmp.resize(0);
116 for ( size_t i = 0; i < pathtmp.size(); i++ )
117 {
118 // this squirreliness is b/c win32 has drive letters and *nix doesn't...
119 #ifdef WIN32
120 if ( i ) tmp += "/";
121 #else
122 tmp += "/";
123 #endif
124 tmp += pathtmp[i];
125 }
126 return tmp;
127 }
128
129 /*static*/ string
130 Path::RelativeFromWorkingDirectory ( const string& path )
131 {
132 vector<string> vwork, vpath, vout;
133 Path::Split ( vwork, working_directory, true );
134 Path::Split ( vpath, path, true );
135 #ifdef WIN32
136 // this squirreliness is b/c win32 has drive letters and *nix doesn't...
137 // not possible to do relative across different drive letters
138 if ( vwork[0] != vpath[0] )
139 return path;
140 #endif
141 size_t i = 0;
142 while ( i < vwork.size() && i < vpath.size() && vwork[i] == vpath[i] )
143 ++i;
144 if ( i < vwork.size() )
145 {
146 // path goes above our working directory, we will need some ..'s
147 for ( size_t j = 0; j < i; j++ )
148 vout.push_back ( ".." );
149 }
150 while ( i < vpath.size() )
151 vout.push_back ( vpath[i++] );
152
153 // now merge vout into a string again
154 string out;
155 for ( i = 0; i < vout.size(); i++ )
156 {
157 // this squirreliness is b/c win32 has drive letters and *nix doesn't...
158 #ifdef WIN32
159 if ( i ) out += "/";
160 #else
161 out += "/";
162 #endif
163 out += vout[i];
164 }
165 return out;
166 }
167
168 /*static*/ void
169 Path::Split ( vector<string>& out,
170 const string& path,
171 bool include_last )
172 {
173 string s ( path );
174 const char* prev = strtok ( &s[0], "/\\" );
175 const char* p = strtok ( NULL, "/\\" );
176 out.resize ( 0 );
177 while ( p )
178 {
179 out.push_back ( prev );
180 prev = p;
181 p = strtok ( NULL, "/\\" );
182 }
183 if ( include_last )
184 out.push_back ( prev );
185 }
186
187 XMLFile::XMLFile()
188 {
189 }
190
191 void
192 XMLFile::close()
193 {
194 while ( _f.size() )
195 {
196 fclose ( _f.back() );
197 _f.pop_back();
198 }
199 _buf.resize(0);
200 _p = _end = NULL;
201 }
202
203 bool
204 XMLFile::open(const string& filename)
205 {
206 close();
207 FILE* f = fopen ( filename.c_str(), "rb" );
208 if ( !f )
209 return false;
210 unsigned long len = (unsigned long)filelen(f);
211 _buf.resize ( len );
212 fread ( &_buf[0], 1, len, f );
213 _p = _buf.c_str();
214 _end = _p + len;
215 _f.push_back ( f );
216 next_token();
217 return true;
218 }
219
220 // next_token() moves the pointer to next token, which may be
221 // an xml element or a text element, basically it's a glorified
222 // skipspace, normally the user of this class won't need to call
223 // this function
224 void
225 XMLFile::next_token()
226 {
227 _p += strspn ( _p, WS );
228 }
229
230 bool
231 XMLFile::next_is_text()
232 {
233 return *_p != '<';
234 }
235
236 bool
237 XMLFile::more_tokens()
238 {
239 return _p != _end;
240 }
241
242 // get_token() is used to return a token, and move the pointer
243 // past the token
244 bool
245 XMLFile::get_token(string& token)
246 {
247 const char* tokend;
248 if ( !strncmp ( _p, "<!--", 4 ) )
249 {
250 tokend = strstr ( _p, "-->" );
251 if ( !tokend )
252 tokend = _end;
253 else
254 tokend += 3;
255 }
256 else if ( *_p == '<' )
257 {
258 tokend = strchr ( _p, '>' );
259 if ( !tokend )
260 tokend = _end;
261 else
262 ++tokend;
263 }
264 else
265 {
266 tokend = strchr ( _p, '<' );
267 if ( !tokend )
268 tokend = _end;
269 while ( tokend > _p && isspace(tokend[-1]) )
270 --tokend;
271 }
272 if ( tokend == _p )
273 return false;
274 token = string ( _p, tokend-_p );
275 _p = tokend;
276 next_token();
277 return true;
278 }
279
280 XMLAttribute::XMLAttribute()
281 {
282 }
283
284 XMLAttribute::XMLAttribute(const string& name_,
285 const string& value_)
286 : name(name_), value(value_)
287 {
288 }
289
290 XMLElement::XMLElement()
291 : parentElement(NULL)
292 {
293 }
294
295 XMLElement::~XMLElement()
296 {
297 size_t i;
298 for ( i = 0; i < attributes.size(); i++ )
299 delete attributes[i];
300 for ( i = 0; i < subElements.size(); i++ )
301 delete subElements[i];
302 }
303
304 void
305 XMLElement::AddSubElement ( XMLElement* e )
306 {
307 subElements.push_back ( e );
308 e->parentElement = this;
309 }
310
311 // Parse()
312 // This function takes a single xml tag ( i.e. beginning with '<' and
313 // ending with '>', and parses out it's tag name and constituent
314 // attributes.
315 // Return Value: returns true if you need to look for a </tag> for
316 // the one it just parsed...
317 bool
318 XMLElement::Parse(const string& token,
319 bool& end_tag)
320 {
321 const char* p = token.c_str();
322 assert ( *p == '<' );
323 ++p;
324 p += strspn ( p, WS );
325
326 // check if this is a comment
327 if ( !strncmp ( p, "!--", 3 ) )
328 {
329 name = "!--";
330 end_tag = false;
331 return false; // never look for end tag to a comment
332 }
333
334 end_tag = ( *p == '/' );
335 if ( end_tag )
336 {
337 ++p;
338 p += strspn ( p, WS );
339 }
340 const char* end = strpbrk ( p, WS );
341 if ( !end )
342 {
343 end = strpbrk ( p, "/>" );
344 assert ( end );
345 }
346 name = string ( p, end-p );
347 p = end;
348 p += strspn ( p, WS );
349 while ( *p != '>' && *p != '/' )
350 {
351 end = strpbrk ( p, WSEQ );
352 if ( !end )
353 {
354 end = strpbrk ( p, "/>" );
355 assert ( end );
356 }
357 string attribute ( p, end-p ), value;
358 p = end;
359 p += strspn ( p, WS );
360 if ( *p == '=' )
361 {
362 ++p;
363 p += strspn ( p, WS );
364 char quote = 0;
365 if ( strchr ( "\"'", *p ) )
366 {
367 quote = *p++;
368 end = strchr ( p, quote );
369 }
370 else
371 {
372 end = strpbrk ( p, WS );
373 }
374 if ( !end )
375 {
376 end = strchr ( p, '>' );
377 assert(end);
378 if ( end[-1] == '/' )
379 end--;
380 }
381 value = string ( p, end-p );
382 p = end;
383 if ( quote && *p == quote )
384 p++;
385 p += strspn ( p, WS );
386 }
387 attributes.push_back ( new XMLAttribute ( attribute, value ) );
388 }
389 return !( *p == '/' ) && !end_tag;
390 }
391
392 XMLAttribute*
393 XMLElement::GetAttribute ( const string& attribute,
394 bool required )
395 {
396 // this would be faster with a tree-based container, but our attribute
397 // lists are likely to stay so short as to not be an issue.
398 for ( size_t i = 0; i < attributes.size(); i++ )
399 {
400 if ( attribute == attributes[i]->name )
401 return attributes[i];
402 }
403 if ( required )
404 {
405 printf ( "syntax error: attribute '%s' required for <%s>\n",
406 attribute.c_str(), name.c_str() );
407 }
408 return NULL;
409 }
410
411 const XMLAttribute*
412 XMLElement::GetAttribute ( const string& attribute,
413 bool required ) const
414 {
415 // this would be faster with a tree-based container, but our attribute
416 // lists are likely to stay so short as to not be an issue.
417 for ( size_t i = 0; i < attributes.size(); i++ )
418 {
419 if ( attribute == attributes[i]->name )
420 return attributes[i];
421 }
422 if ( required )
423 {
424 printf ( "syntax error: attribute '%s' required for <%s>\n",
425 attribute.c_str(), name.c_str() );
426 }
427 return NULL;
428 }
429
430 // XMLParse()
431 // This function reads a "token" from the file loaded in XMLFile
432 // REM TODO FIXME: At the moment it can't handle comments or non-xml tags.
433 // if it finds a tag that is non-singular, it parses sub-elements and/or
434 // inner text into the XMLElement that it is building to return.
435 // Return Value: an XMLElement allocated via the new operator that contains
436 // it's parsed data. Keep calling this function until it returns NULL
437 // (no more data)
438 XMLElement*
439 XMLParse(XMLFile& f,
440 const Path& path,
441 bool* pend_tag /*= NULL*/)
442 {
443 string token;
444 if ( !f.get_token(token) )
445 return NULL;
446 bool end_tag;
447
448 while ( token[0] != '<' )
449 {
450 printf ( "syntax error: expecting xml tag, not '%s'\n", token.c_str() );
451 if ( !f.get_token(token) )
452 return NULL;
453 }
454
455 XMLElement* e = new XMLElement;
456 bool bNeedEnd = e->Parse ( token, end_tag );
457
458 if ( e->name == "xi:include" )
459 {
460 XMLAttribute* att;
461 att = e->GetAttribute("href",true);
462 if ( att )
463 {
464 string file ( path.Fixup(att->value,true) );
465 string top_file ( Path::RelativeFromWorkingDirectory ( file ) );
466 e->attributes.push_back ( new XMLAttribute ( "top_href", top_file ) );
467 XMLFile fInc;
468 if ( !fInc.open ( file ) )
469 printf ( "xi:include error, couldn't find file '%s'\n", file.c_str() );
470 else
471 {
472 Path path2 ( path, att->value );
473 for ( ;; )
474 {
475 XMLElement* e2 = XMLParse ( fInc, path2 );
476 if ( !e2 )
477 break;
478 e->AddSubElement ( e2 );
479 }
480 }
481 }
482 }
483
484 if ( !bNeedEnd )
485 {
486 if ( pend_tag )
487 *pend_tag = end_tag;
488 else if ( end_tag )
489 {
490 delete e;
491 printf ( "syntax error: end tag '%s' not expected\n", token.c_str() );
492 return NULL;
493 }
494 return e;
495 }
496 bool bThisMixingErrorReported = false;
497 while ( f.more_tokens() )
498 {
499 if ( f.next_is_text() )
500 {
501 if ( !f.get_token ( token ) || !token.size() )
502 {
503 printf ( "internal tool error - get_token() failed when more_tokens() returned true\n" );
504 break;
505 }
506 if ( e->subElements.size() && !bThisMixingErrorReported )
507 {
508 printf ( "syntax error: mixing of inner text with sub elements\n" );
509 bThisMixingErrorReported = true;
510 }
511 if ( e->value.size() )
512 {
513 printf ( "syntax error: multiple instances of inner text\n" );
514 e->value += " " + token;
515 }
516 else
517 e->value = token;
518 }
519 else
520 {
521 XMLElement* e2 = XMLParse ( f, path, &end_tag );
522 if ( end_tag )
523 {
524 if ( e->name != e2->name )
525 printf ( "end tag name mismatch\n" );
526 delete e2;
527 break;
528 }
529 if ( e->value.size() && !bThisMixingErrorReported )
530 {
531 printf ( "syntax error: mixing of inner text with sub elements\n" );
532 bThisMixingErrorReported = true;
533 }
534 e->AddSubElement ( e2 );
535 }
536 }
537 return e;
538 }