small XMLFile cleanup
[reactos.git] / reactos / tools / rbuild / XML.cpp
1 // XML.cpp
2
3 #include "pch.h"
4
5 #include <direct.h>
6 #include <io.h>
7 #include <assert.h>
8
9 #include "XML.h"
10
11 using std::string;
12 using std::vector;
13
14 #ifdef WIN32
15 #define getcwd _getcwd
16 #endif//WIN32
17
18 static const char* WS = " \t\r\n";
19 static const char* WSEQ = " =\t\r\n";
20
21 string working_directory;
22
23 void
24 InitWorkingDirectory()
25 {
26 // store the current directory for path calculations
27 working_directory.resize ( _MAX_PATH );
28 working_directory[0] = 0;
29 getcwd ( &working_directory[0], working_directory.size() );
30 working_directory.resize ( strlen ( working_directory.c_str() ) );
31 }
32
33 #ifdef _MSC_VER
34 unsigned __int64
35 #else
36 unsigned long long
37 #endif
38 filelen ( FILE* f )
39 {
40 #ifdef WIN32
41 return _filelengthi64 ( _fileno(f) );
42 #elif defined(UNIX)
43 struct stat64 file_stat;
44 if ( fstat64(fileno(f), &file_stat) != 0 )
45 return 0;
46 return file_stat.st_size;
47 #endif
48 }
49
50 Path::Path()
51 {
52 if ( !working_directory.size() )
53 InitWorkingDirectory();
54 string s ( working_directory );
55 const char* p = strtok ( &s[0], "/\\" );
56 while ( p )
57 {
58 if ( *p )
59 path.push_back ( p );
60 p = strtok ( NULL, "/\\" );
61 }
62 }
63
64 Path::Path ( const Path& cwd, const string& file )
65 {
66 string s ( cwd.Fixup ( file, false ) );
67 const char* p = strtok ( &s[0], "/\\" );
68 while ( p )
69 {
70 if ( *p )
71 path.push_back ( p );
72 p = strtok ( NULL, "/\\" );
73 }
74 }
75
76 string
77 Path::Fixup ( const string& file, bool include_filename ) const
78 {
79 if ( strchr ( "/\\", file[0] )
80 #ifdef WIN32
81 // this squirreliness is b/c win32 has drive letters and *nix doesn't...
82 || file[1] == ':'
83 #endif//WIN32
84 )
85 {
86 return file;
87 }
88 vector<string> pathtmp ( path );
89 string tmp ( file );
90 const char* prev = strtok ( &tmp[0], "/\\" );
91 const char* p = strtok ( NULL, "/\\" );
92 while ( p )
93 {
94 if ( !strcmp ( prev, "." ) )
95 ; // do nothing
96 else if ( !strcmp ( prev, ".." ) )
97 {
98 // this squirreliness is b/c win32 has drive letters and *nix doesn't...
99 #ifdef WIN32
100 if ( pathtmp.size() > 1 )
101 #else
102 if ( pathtmp.size() )
103 #endif
104 pathtmp.resize ( pathtmp.size() - 1 );
105 }
106 else
107 pathtmp.push_back ( prev );
108 prev = p;
109 p = strtok ( NULL, "/\\" );
110 }
111 if ( include_filename )
112 pathtmp.push_back ( prev );
113
114 // reuse tmp variable to return recombined path
115 tmp.resize(0);
116 for ( size_t i = 0; i < pathtmp.size(); i++ )
117 {
118 // this squirreliness is b/c win32 has drive letters and *nix doesn't...
119 #ifdef WIN32
120 if ( i ) tmp += "/";
121 #else
122 tmp += "/";
123 #endif
124 tmp += pathtmp[i];
125 }
126 return tmp;
127 }
128
129 /*static*/ string
130 Path::RelativeFromWorkingDirectory ( const string& path )
131 {
132 vector<string> vwork, vpath, vout;
133 Path::Split ( vwork, working_directory, true );
134 Path::Split ( vpath, path, true );
135 #ifdef WIN32
136 // this squirreliness is b/c win32 has drive letters and *nix doesn't...
137 // not possible to do relative across different drive letters
138 if ( vwork[0] != vpath[0] )
139 return path;
140 #endif
141 size_t i = 0;
142 while ( i < vwork.size() && i < vpath.size() && vwork[i] == vpath[i] )
143 ++i;
144 if ( i < vwork.size() )
145 {
146 // path goes above our working directory, we will need some ..'s
147 for ( size_t j = 0; j < i; j++ )
148 vout.push_back ( ".." );
149 }
150 while ( i < vpath.size() )
151 vout.push_back ( vpath[i++] );
152
153 // now merge vout into a string again
154 string out;
155 for ( i = 0; i < vout.size(); i++ )
156 {
157 // this squirreliness is b/c win32 has drive letters and *nix doesn't...
158 #ifdef WIN32
159 if ( i ) out += "/";
160 #else
161 out += "/";
162 #endif
163 out += vout[i];
164 }
165 return out;
166 }
167
168 /*static*/ void
169 Path::Split ( vector<string>& out,
170 const string& path,
171 bool include_last )
172 {
173 string s ( path );
174 const char* prev = strtok ( &s[0], "/\\" );
175 const char* p = strtok ( NULL, "/\\" );
176 out.resize ( 0 );
177 while ( p )
178 {
179 out.push_back ( prev );
180 prev = p;
181 p = strtok ( NULL, "/\\" );
182 }
183 if ( include_last )
184 out.push_back ( prev );
185 }
186
187 XMLFile::XMLFile()
188 {
189 }
190
191 void
192 XMLFile::close()
193 {
194 _buf.resize(0);
195 _p = _end = NULL;
196 }
197
198 bool
199 XMLFile::open(const string& filename)
200 {
201 close();
202 FILE* f = fopen ( filename.c_str(), "rb" );
203 if ( !f )
204 return false;
205 unsigned long len = (unsigned long)filelen(f);
206 _buf.resize ( len );
207 fread ( &_buf[0], 1, len, f );
208 fclose ( f );
209 _p = _buf.c_str();
210 _end = _p + len;
211 next_token();
212 return true;
213 }
214
215 // next_token() moves the pointer to next token, which may be
216 // an xml element or a text element, basically it's a glorified
217 // skipspace, normally the user of this class won't need to call
218 // this function
219 void
220 XMLFile::next_token()
221 {
222 _p += strspn ( _p, WS );
223 }
224
225 bool
226 XMLFile::next_is_text()
227 {
228 return *_p != '<';
229 }
230
231 bool
232 XMLFile::more_tokens()
233 {
234 return _p != _end;
235 }
236
237 // get_token() is used to return a token, and move the pointer
238 // past the token
239 bool
240 XMLFile::get_token(string& token)
241 {
242 const char* tokend;
243 if ( !strncmp ( _p, "<!--", 4 ) )
244 {
245 tokend = strstr ( _p, "-->" );
246 if ( !tokend )
247 tokend = _end;
248 else
249 tokend += 3;
250 }
251 else if ( *_p == '<' )
252 {
253 tokend = strchr ( _p, '>' );
254 if ( !tokend )
255 tokend = _end;
256 else
257 ++tokend;
258 }
259 else
260 {
261 tokend = strchr ( _p, '<' );
262 if ( !tokend )
263 tokend = _end;
264 while ( tokend > _p && isspace(tokend[-1]) )
265 --tokend;
266 }
267 if ( tokend == _p )
268 return false;
269 token = string ( _p, tokend-_p );
270 _p = tokend;
271 next_token();
272 return true;
273 }
274
275 XMLAttribute::XMLAttribute()
276 {
277 }
278
279 XMLAttribute::XMLAttribute(const string& name_,
280 const string& value_)
281 : name(name_), value(value_)
282 {
283 }
284
285 XMLElement::XMLElement()
286 : parentElement(NULL)
287 {
288 }
289
290 XMLElement::~XMLElement()
291 {
292 size_t i;
293 for ( i = 0; i < attributes.size(); i++ )
294 delete attributes[i];
295 for ( i = 0; i < subElements.size(); i++ )
296 delete subElements[i];
297 }
298
299 void
300 XMLElement::AddSubElement ( XMLElement* e )
301 {
302 subElements.push_back ( e );
303 e->parentElement = this;
304 }
305
306 // Parse()
307 // This function takes a single xml tag ( i.e. beginning with '<' and
308 // ending with '>', and parses out it's tag name and constituent
309 // attributes.
310 // Return Value: returns true if you need to look for a </tag> for
311 // the one it just parsed...
312 bool
313 XMLElement::Parse(const string& token,
314 bool& end_tag)
315 {
316 const char* p = token.c_str();
317 assert ( *p == '<' );
318 ++p;
319 p += strspn ( p, WS );
320
321 // check if this is a comment
322 if ( !strncmp ( p, "!--", 3 ) )
323 {
324 name = "!--";
325 end_tag = false;
326 return false; // never look for end tag to a comment
327 }
328
329 end_tag = ( *p == '/' );
330 if ( end_tag )
331 {
332 ++p;
333 p += strspn ( p, WS );
334 }
335 const char* end = strpbrk ( p, WS );
336 if ( !end )
337 {
338 end = strpbrk ( p, "/>" );
339 assert ( end );
340 }
341 name = string ( p, end-p );
342 p = end;
343 p += strspn ( p, WS );
344 while ( *p != '>' && *p != '/' )
345 {
346 end = strpbrk ( p, WSEQ );
347 if ( !end )
348 {
349 end = strpbrk ( p, "/>" );
350 assert ( end );
351 }
352 string attribute ( p, end-p ), value;
353 p = end;
354 p += strspn ( p, WS );
355 if ( *p == '=' )
356 {
357 ++p;
358 p += strspn ( p, WS );
359 char quote = 0;
360 if ( strchr ( "\"'", *p ) )
361 {
362 quote = *p++;
363 end = strchr ( p, quote );
364 }
365 else
366 {
367 end = strpbrk ( p, WS );
368 }
369 if ( !end )
370 {
371 end = strchr ( p, '>' );
372 assert(end);
373 if ( end[-1] == '/' )
374 end--;
375 }
376 value = string ( p, end-p );
377 p = end;
378 if ( quote && *p == quote )
379 p++;
380 p += strspn ( p, WS );
381 }
382 attributes.push_back ( new XMLAttribute ( attribute, value ) );
383 }
384 return !( *p == '/' ) && !end_tag;
385 }
386
387 XMLAttribute*
388 XMLElement::GetAttribute ( const string& attribute,
389 bool required )
390 {
391 // this would be faster with a tree-based container, but our attribute
392 // lists are likely to stay so short as to not be an issue.
393 for ( size_t i = 0; i < attributes.size(); i++ )
394 {
395 if ( attribute == attributes[i]->name )
396 return attributes[i];
397 }
398 if ( required )
399 {
400 printf ( "syntax error: attribute '%s' required for <%s>\n",
401 attribute.c_str(), name.c_str() );
402 }
403 return NULL;
404 }
405
406 const XMLAttribute*
407 XMLElement::GetAttribute ( const string& attribute,
408 bool required ) const
409 {
410 // this would be faster with a tree-based container, but our attribute
411 // lists are likely to stay so short as to not be an issue.
412 for ( size_t i = 0; i < attributes.size(); i++ )
413 {
414 if ( attribute == attributes[i]->name )
415 return attributes[i];
416 }
417 if ( required )
418 {
419 printf ( "syntax error: attribute '%s' required for <%s>\n",
420 attribute.c_str(), name.c_str() );
421 }
422 return NULL;
423 }
424
425 // XMLParse()
426 // This function reads a "token" from the file loaded in XMLFile
427 // REM TODO FIXME: At the moment it can't handle comments or non-xml tags.
428 // if it finds a tag that is non-singular, it parses sub-elements and/or
429 // inner text into the XMLElement that it is building to return.
430 // Return Value: an XMLElement allocated via the new operator that contains
431 // it's parsed data. Keep calling this function until it returns NULL
432 // (no more data)
433 XMLElement*
434 XMLParse(XMLFile& f,
435 const Path& path,
436 bool* pend_tag /*= NULL*/)
437 {
438 string token;
439 if ( !f.get_token(token) )
440 return NULL;
441 bool end_tag;
442
443 while ( token[0] != '<' || !strncmp ( token.c_str(), "<!--", 4 ) )
444 {
445 if ( token[0] != '<' )
446 printf ( "syntax error: expecting xml tag, not '%s'\n", token.c_str() );
447 if ( !f.get_token(token) )
448 return NULL;
449 }
450
451 XMLElement* e = new XMLElement;
452 bool bNeedEnd = e->Parse ( token, end_tag );
453
454 if ( e->name == "xi:include" )
455 {
456 XMLAttribute* att;
457 att = e->GetAttribute("href",true);
458 if ( att )
459 {
460 string file ( path.Fixup(att->value,true) );
461 string top_file ( Path::RelativeFromWorkingDirectory ( file ) );
462 e->attributes.push_back ( new XMLAttribute ( "top_href", top_file ) );
463 XMLFile fInc;
464 if ( !fInc.open ( file ) )
465 printf ( "xi:include error, couldn't find file '%s'\n", file.c_str() );
466 else
467 {
468 Path path2 ( path, att->value );
469 for ( ;; )
470 {
471 XMLElement* e2 = XMLParse ( fInc, path2 );
472 if ( !e2 )
473 break;
474 e->AddSubElement ( e2 );
475 }
476 }
477 }
478 }
479
480 if ( !bNeedEnd )
481 {
482 if ( pend_tag )
483 *pend_tag = end_tag;
484 else if ( end_tag )
485 {
486 delete e;
487 printf ( "syntax error: end tag '%s' not expected\n", token.c_str() );
488 return NULL;
489 }
490 return e;
491 }
492 bool bThisMixingErrorReported = false;
493 while ( f.more_tokens() )
494 {
495 if ( f.next_is_text() )
496 {
497 if ( !f.get_token ( token ) || !token.size() )
498 {
499 printf ( "internal tool error - get_token() failed when more_tokens() returned true\n" );
500 break;
501 }
502 if ( e->subElements.size() && !bThisMixingErrorReported )
503 {
504 printf ( "syntax error: mixing of inner text with sub elements\n" );
505 bThisMixingErrorReported = true;
506 }
507 if ( e->value.size() )
508 {
509 printf ( "syntax error: multiple instances of inner text\n" );
510 e->value += " " + token;
511 }
512 else
513 e->value = token;
514 }
515 else
516 {
517 XMLElement* e2 = XMLParse ( f, path, &end_tag );
518 if ( end_tag )
519 {
520 if ( e->name != e2->name )
521 printf ( "end tag name mismatch\n" );
522 delete e2;
523 break;
524 }
525 if ( e->value.size() && !bThisMixingErrorReported )
526 {
527 printf ( "syntax error: mixing of inner text with sub elements\n" );
528 bThisMixingErrorReported = true;
529 }
530 e->AddSubElement ( e2 );
531 }
532 }
533 return e;
534 }