handle xml comments and added handling for some possible error conditions
[reactos.git] / reactos / tools / rbuild / rbuild.cpp
1 // rbuild.cpp
2
3 #ifdef _MSC_VER
4 #pragma warning ( disable : 4786 ) // identifier was truncated to '255' characters in the debug information
5 #endif//_MSC_VER
6
7 #include <stdio.h>
8 #include <io.h>
9 #include <assert.h>
10 #include <direct.h>
11 #include "rbuild.h"
12
13 using std::string;
14 using std::vector;
15
16 #ifdef WIN32
17 #define getcwd _getcwd
18 #endif//WIN32
19 string working_directory;
20
21 #ifdef _MSC_VER
22 unsigned __int64
23 #else
24 unsigned long long
25 #endif
26 filelen ( FILE* f )
27 {
28 #ifdef WIN32
29 return _filelengthi64 ( _fileno(f) );
30 #elif defined(UNIX)
31 struct stat64 file_stat;
32 if ( fstat64(fileno(f), &file_stat) != 0 )
33 return 0;
34 return file_stat.st_size;
35 #endif
36 }
37
38 static const char* WS = " \t\r\n";
39 static const char* WSEQ = " =\t\r\n";
40
41 Path::Path()
42 {
43 string s ( working_directory );
44 const char* p = strtok ( &s[0], "/\\" );
45 while ( p )
46 {
47 if ( *p )
48 path.push_back ( p );
49 p = strtok ( NULL, "/\\" );
50 }
51 }
52
53 Path::Path ( const Path& cwd, const string& file )
54 {
55 string s ( cwd.Fixup ( file, false ) );
56 const char* p = strtok ( &s[0], "/\\" );
57 while ( p )
58 {
59 if ( *p )
60 path.push_back ( p );
61 p = strtok ( NULL, "/\\" );
62 }
63 }
64
65 string
66 Path::Fixup ( const string& file, bool include_filename ) const
67 {
68 if ( strchr ( "/\\", file[0] )
69 #ifdef WIN32
70 // this squirreliness is b/c win32 has drive letters and *nix doesn't...
71 || file[1] == ':'
72 #endif//WIN32
73 )
74 {
75 return file;
76 }
77 vector<string> pathtmp ( path );
78 string tmp ( file );
79 const char* prev = strtok ( &tmp[0], "/\\" );
80 const char* p = strtok ( NULL, "/\\" );
81 while ( p )
82 {
83 if ( !strcmp ( prev, "." ) )
84 ; // do nothing
85 else if ( !strcmp ( prev, ".." ) )
86 {
87 // this squirreliness is b/c win32 has drive letters and *nix doesn't...
88 #ifdef WIN32
89 if ( pathtmp.size() > 1 )
90 #else
91 if ( pathtmp.size() )
92 #endif
93 pathtmp.resize ( pathtmp.size() - 1 );
94 }
95 else
96 pathtmp.push_back ( prev );
97 prev = p;
98 p = strtok ( NULL, "/\\" );
99 }
100 if ( include_filename )
101 pathtmp.push_back ( prev );
102
103 // reuse tmp variable to return recombined path
104 tmp.resize(0);
105 for ( size_t i = 0; i < pathtmp.size(); i++ )
106 {
107 // this squirreliness is b/c win32 has drive letters and *nix doesn't...
108 #ifdef WIN32
109 if ( i ) tmp += "/";
110 #else
111 tmp += "/";
112 #endif
113 tmp += pathtmp[i];
114 }
115 return tmp;
116 }
117
118 /*static*/ string
119 Path::RelativeFromWorkingDirectory ( const string& path )
120 {
121 vector<string> vwork, vpath, vout;
122 Path::Split ( vwork, working_directory, true );
123 Path::Split ( vpath, path, true );
124 #ifdef WIN32
125 // this squirreliness is b/c win32 has drive letters and *nix doesn't...
126 // not possible to do relative across different drive letters
127 if ( vwork[0] != vpath[0] )
128 return path;
129 #endif
130 size_t i = 0;
131 while ( i < vwork.size() && i < vpath.size() && vwork[i] == vpath[i] )
132 ++i;
133 if ( i < vwork.size() )
134 {
135 // path goes above our working directory, we will need some ..'s
136 for ( int j = 0; j < i; j++ )
137 vout.push_back ( ".." );
138 }
139 while ( i < vpath.size() )
140 vout.push_back ( vpath[i++] );
141
142 // now merge vout into a string again
143 string out;
144 for ( i = 0; i < vout.size(); i++ )
145 {
146 // this squirreliness is b/c win32 has drive letters and *nix doesn't...
147 #ifdef WIN32
148 if ( i ) out += "/";
149 #else
150 out += "/";
151 #endif
152 out += vout[i];
153 }
154 return out;
155 }
156
157 /*static*/ void
158 Path::Split ( vector<string>& out,
159 const string& path,
160 bool include_last )
161 {
162 string s ( path );
163 const char* prev = strtok ( &s[0], "/\\" );
164 const char* p = strtok ( NULL, "/\\" );
165 out.resize ( 0 );
166 while ( p )
167 {
168 out.push_back ( prev );
169 prev = p;
170 p = strtok ( NULL, "/\\" );
171 }
172 if ( include_last )
173 out.push_back ( prev );
174 }
175
176 XMLFile::XMLFile()
177 {
178 }
179
180 void
181 XMLFile::close()
182 {
183 while ( _f.size() )
184 {
185 fclose ( _f.back() );
186 _f.pop_back();
187 }
188 _buf.resize(0);
189 _p = _end = NULL;
190 }
191
192 bool
193 XMLFile::open(const string& filename)
194 {
195 close();
196 FILE* f = fopen ( filename.c_str(), "rb" );
197 if ( !f )
198 return false;
199 unsigned long len = (unsigned long)filelen(f);
200 _buf.resize ( len );
201 fread ( &_buf[0], 1, len, f );
202 _p = _buf.c_str();
203 _end = _p + len;
204 _f.push_back ( f );
205 next_token();
206 return true;
207 }
208
209 // next_token() moves the pointer to next token, which may be
210 // an xml element or a text element, basically it's a glorified
211 // skipspace, normally the user of this class won't need to call
212 // this function
213 void
214 XMLFile::next_token()
215 {
216 _p += strspn ( _p, WS );
217 }
218
219 bool
220 XMLFile::next_is_text()
221 {
222 return *_p != '<';
223 }
224
225 bool
226 XMLFile::more_tokens()
227 {
228 return _p != _end;
229 }
230
231 // get_token() is used to return a token, and move the pointer
232 // past the token
233 bool
234 XMLFile::get_token(string& token)
235 {
236 const char* tokend;
237 if ( !strncmp ( _p, "<!--", 4 ) )
238 {
239 tokend = strstr ( _p, "-->" );
240 if ( !tokend )
241 tokend = _end;
242 else
243 tokend += 3;
244 }
245 else if ( *_p == '<' )
246 {
247 tokend = strchr ( _p, '>' );
248 if ( !tokend )
249 tokend = _end;
250 else
251 ++tokend;
252 }
253 else
254 {
255 tokend = strchr ( _p, '<' );
256 if ( !tokend )
257 tokend = _end;
258 while ( tokend > _p && isspace(tokend[-1]) )
259 --tokend;
260 }
261 if ( tokend == _p )
262 return false;
263 token = string ( _p, tokend-_p );
264 _p = tokend;
265 next_token();
266 return true;
267 }
268
269 XMLAttribute::XMLAttribute()
270 {
271 }
272
273 XMLAttribute::XMLAttribute(const string& name_,
274 const string& value_)
275 : name(name_), value(value_)
276 {
277 }
278
279 XMLElement::XMLElement()
280 : parentElement(NULL)
281 {
282 }
283
284 XMLElement::~XMLElement()
285 {
286 size_t i;
287 for ( i = 0; i < attributes.size(); i++ )
288 delete attributes[i];
289 for ( i = 0; i < subElements.size(); i++ )
290 delete subElements[i];
291 }
292
293 void
294 XMLElement::AddSubElement ( XMLElement* e )
295 {
296 subElements.push_back ( e );
297 e->parentElement = this;
298 }
299
300 // Parse()
301 // This function takes a single xml tag ( i.e. beginning with '<' and
302 // ending with '>', and parses out it's tag name and constituent
303 // attributes.
304 // Return Value: returns true if you need to look for a </tag> for
305 // the one it just parsed...
306 bool
307 XMLElement::Parse(const string& token,
308 bool& end_tag)
309 {
310 const char* p = token.c_str();
311 assert ( *p == '<' );
312 ++p;
313 p += strspn ( p, WS );
314
315 // check if this is a comment
316 if ( !strncmp ( p, "!--", 3 ) )
317 {
318 name = "!--";
319 end_tag = false;
320 return false; // never look for end tag to a comment
321 }
322
323 end_tag = ( *p == '/' );
324 if ( end_tag )
325 {
326 ++p;
327 p += strspn ( p, WS );
328 }
329 const char* end = strpbrk ( p, WS );
330 if ( !end )
331 {
332 end = strpbrk ( p, "/>" );
333 assert ( end );
334 }
335 name = string ( p, end-p );
336 p = end;
337 p += strspn ( p, WS );
338 while ( *p != '>' && *p != '/' )
339 {
340 end = strpbrk ( p, WSEQ );
341 if ( !end )
342 {
343 end = strpbrk ( p, "/>" );
344 assert ( end );
345 }
346 string attribute ( p, end-p ), value;
347 p = end;
348 p += strspn ( p, WS );
349 if ( *p == '=' )
350 {
351 ++p;
352 p += strspn ( p, WS );
353 char quote = 0;
354 if ( strchr ( "\"'", *p ) )
355 {
356 quote = *p++;
357 end = strchr ( p, quote );
358 }
359 else
360 {
361 end = strpbrk ( p, WS );
362 }
363 if ( !end )
364 {
365 end = strchr ( p, '>' );
366 assert(end);
367 if ( end[-1] == '/' )
368 end--;
369 }
370 value = string ( p, end-p );
371 p = end;
372 if ( quote && *p == quote )
373 p++;
374 p += strspn ( p, WS );
375 }
376 attributes.push_back ( new XMLAttribute ( attribute, value ) );
377 }
378 return !( *p == '/' ) && !end_tag;
379 }
380
381 XMLAttribute*
382 XMLElement::GetAttribute ( const string& attribute,
383 bool required )
384 {
385 // this would be faster with a tree-based container, but our attribute
386 // lists are likely to stay so short as to not be an issue.
387 for ( int i = 0; i < attributes.size(); i++ )
388 {
389 if ( attribute == attributes[i]->name )
390 return attributes[i];
391 }
392 if ( required )
393 {
394 printf ( "syntax error: attribute '%s' required for <%s>\n",
395 attribute.c_str(), name.c_str() );
396 }
397 return NULL;
398 }
399
400 const XMLAttribute*
401 XMLElement::GetAttribute ( const string& attribute,
402 bool required ) const
403 {
404 // this would be faster with a tree-based container, but our attribute
405 // lists are likely to stay so short as to not be an issue.
406 for ( int i = 0; i < attributes.size(); i++ )
407 {
408 if ( attribute == attributes[i]->name )
409 return attributes[i];
410 }
411 if ( required )
412 {
413 printf ( "syntax error: attribute '%s' required for <%s>\n",
414 attribute.c_str(), name.c_str() );
415 }
416 return NULL;
417 }
418
419 // XMLParse()
420 // This function reads a "token" from the file loaded in XMLFile
421 // REM TODO FIXME: At the moment it can't handle comments or non-xml tags.
422 // if it finds a tag that is non-singular, it parses sub-elements and/or
423 // inner text into the XMLElement that it is building to return.
424 // Return Value: an XMLElement allocated via the new operator that contains
425 // it's parsed data. Keep calling this function until it returns NULL
426 // (no more data)
427 XMLElement*
428 XMLParse(XMLFile& f,
429 const Path& path,
430 bool* pend_tag = NULL)
431 {
432 string token;
433 if ( !f.get_token(token) )
434 return NULL;
435 bool end_tag;
436
437 while ( token[0] != '<' )
438 {
439 printf ( "syntax error: expecting xml tag, not '%s'\n", token.c_str() );
440 if ( !f.get_token(token) )
441 return NULL;
442 }
443
444 XMLElement* e = new XMLElement;
445 bool bNeedEnd = e->Parse ( token, end_tag );
446
447 if ( e->name == "xi:include" )
448 {
449 XMLAttribute* att;
450 att = e->GetAttribute("href",true);
451 if ( att )
452 {
453 string file ( path.Fixup(att->value,true) );
454 string top_file ( Path::RelativeFromWorkingDirectory ( file ) );
455 e->attributes.push_back ( new XMLAttribute ( "top_href", top_file ) );
456 XMLFile fInc;
457 if ( !fInc.open ( file ) )
458 printf ( "xi:include error, couldn't find file '%s'\n", file.c_str() );
459 else
460 {
461 Path path2 ( path, att->value );
462 for ( ;; )
463 {
464 XMLElement* e2 = XMLParse ( fInc, path2 );
465 if ( !e2 )
466 break;
467 e->AddSubElement ( e2 );
468 }
469 }
470 }
471 }
472
473 if ( !bNeedEnd )
474 {
475 if ( pend_tag )
476 *pend_tag = end_tag;
477 else if ( end_tag )
478 {
479 delete e;
480 printf ( "syntax error: end tag '%s' not expected\n", token.c_str() );
481 return NULL;
482 }
483 return e;
484 }
485 bool bThisMixingErrorReported = false;
486 while ( f.more_tokens() )
487 {
488 if ( f.next_is_text() )
489 {
490 if ( !f.get_token ( token ) || !token.size() )
491 {
492 printf ( "internal tool error - get_token() failed when more_tokens() returned true\n" );
493 break;
494 }
495 if ( e->subElements.size() && !bThisMixingErrorReported )
496 {
497 printf ( "syntax error: mixing of inner text with sub elements\n" );
498 bThisMixingErrorReported = true;
499 }
500 if ( e->value.size() )
501 {
502 printf ( "syntax error: multiple instances of inner text\n" );
503 e->value += " " + token;
504 }
505 else
506 e->value = token;
507 }
508 else
509 {
510 XMLElement* e2 = XMLParse ( f, path, &end_tag );
511 if ( end_tag )
512 {
513 if ( e->name != e2->name )
514 printf ( "end tag name mismatch\n" );
515 delete e2;
516 break;
517 }
518 if ( e->value.size() && !bThisMixingErrorReported )
519 {
520 printf ( "syntax error: mixing of inner text with sub elements\n" );
521 bThisMixingErrorReported = true;
522 }
523 e->AddSubElement ( e2 );
524 }
525 }
526 return e;
527 }
528
529 Project::~Project()
530 {
531 for ( size_t i = 0; i < modules.size(); i++ )
532 delete modules[i];
533 }
534
535 void
536 Project::ProcessXML ( const XMLElement& e, const string& path )
537 {
538 const XMLAttribute *att;
539 string subpath(path);
540 if ( e.name == "project" )
541 {
542 att = e.GetAttribute ( "name", false );
543 if ( !att )
544 name = "Unnamed";
545 else
546 name = att->value;
547 }
548 else if ( e.name == "module" )
549 {
550 att = e.GetAttribute ( "name", true );
551 if ( !att )
552 return;
553 Module* module = new Module ( e, att->value, path );
554 modules.push_back ( module );
555 return; // REM TODO FIXME no processing of modules... yet
556 }
557 else if ( e.name == "directory" )
558 {
559 const XMLAttribute* att = e.GetAttribute ( "name", true );
560 if ( !att )
561 return;
562 subpath = path + "/" + att->value;
563 }
564 for ( size_t i = 0; i < e.subElements.size(); i++ )
565 ProcessXML ( *e.subElements[i], subpath );
566 }
567
568 int
569 main ( int argc, char** argv )
570 {
571 // store the current directory for path calculations
572 working_directory.resize ( _MAX_PATH );
573 working_directory[0] = 0;
574 getcwd ( &working_directory[0], working_directory.size() );
575 working_directory.resize ( strlen ( working_directory.c_str() ) );
576
577 XMLFile f;
578 Path path;
579 string xml_file ( "ReactOS.xml" );
580 if ( !f.open ( xml_file ) )
581 {
582 printf ( "couldn't open ReactOS.xml!\n" );
583 return -1;
584 }
585
586 vector<string> xml_dependencies;
587 xml_dependencies.push_back ( xml_file );
588 for ( ;; )
589 {
590 XMLElement* head = XMLParse ( f, path );
591 if ( !head )
592 break; // end of file
593
594 if ( head->name == "!--" )
595 continue; // ignore comments
596
597 if ( head->name != "project" )
598 {
599 printf ( "error: expecting 'project', got '%s'\n", head->name.c_str() );
600 continue;
601 }
602
603 Project* proj = new Project;
604 proj->ProcessXML ( *head, "." );
605
606 // REM TODO FIXME actually do something with Project object...
607 printf ( "Found %lu modules:\n", proj->modules.size() );
608 for ( size_t i = 0; i < proj->modules.size(); i++ )
609 {
610 Module& m = *proj->modules[i];
611 printf ( "\t%s in folder: %s\n",
612 m.name.c_str(),
613 m.path.c_str() );
614 }
615
616 delete proj;
617 delete head;
618 }
619
620 return 0;
621 }