reactos/tools/rbuild/XML.cpp

   1 // XML.cpp
   2
   3 #include "pch.h"
   4
   5 #include <direct.h>
   6 #include <io.h>
   7 #include <assert.h>
   8
   9 #include "XML.h"
  10
  11 using std::string;
  12 using std::vector;
  13
  14 #ifdef WIN32
  15 #define getcwd _getcwd
  16 #endif//WIN32
  17
  18 static const char* WS = " \t\r\n";
  19 static const char* WSEQ = " =\t\r\n";
  20
  21 string working_directory;
  22
  23 void
  24 InitWorkingDirectory()
  25 {
  26         // store the current directory for path calculations
  27         working_directory.resize ( _MAX_PATH );
  28         working_directory[0] = 0;
  29         getcwd ( &working_directory[0], working_directory.size() );
  30         working_directory.resize ( strlen ( working_directory.c_str() ) );
  31 }
  32
  33 #ifdef _MSC_VER
  34 unsigned __int64
  35 #else
  36 unsigned long long
  37 #endif
  38 filelen ( FILE* f )
  39 {
  40 #ifdef WIN32
  41         return _filelengthi64 ( _fileno(f) );
  42 #elif defined(UNIX)
  43         struct stat64 file_stat;
  44         if ( fstat64(fileno(f), &file_stat) != 0 )
  45                 return 0;
  46         return file_stat.st_size;
  47 #endif
  48 }
  49
  50 Path::Path()
  51 {
  52         if ( !working_directory.size() )
  53                 InitWorkingDirectory();
  54         string s ( working_directory );
  55         const char* p = strtok ( &s[0], "/\\" );
  56         while ( p )
  57         {
  58                 if ( *p )
  59                         path.push_back ( p );
  60                 p = strtok ( NULL, "/\\" );
  61         }
  62 }
  63
  64 Path::Path ( const Path& cwd, const string& file )
  65 {
  66         string s ( cwd.Fixup ( file, false ) );
  67         const char* p = strtok ( &s[0], "/\\" );
  68         while ( p )
  69         {
  70                 if ( *p )
  71                         path.push_back ( p );
  72                 p = strtok ( NULL, "/\\" );
  73         }
  74 }
  75
  76 string
  77 Path::Fixup ( const string& file, bool include_filename ) const
  78 {
  79         if ( strchr ( "/\\", file[0] )
  80 #ifdef WIN32
  81                 // this squirreliness is b/c win32 has drive letters and *nix doesn't...
  82                 || file[1] == ':'
  83 #endif//WIN32
  84                 )
  85         {
  86                 return file;
  87         }
  88         vector<string> pathtmp ( path );
  89         string tmp ( file );
  90         const char* prev = strtok ( &tmp[0], "/\\" );
  91         const char* p = strtok ( NULL, "/\\" );
  92         while ( p )
  93         {
  94                 if ( !strcmp ( prev, "." ) )
  95                         ; // do nothing
  96                 else if ( !strcmp ( prev, ".." ) )
  97                 {
  98                         // this squirreliness is b/c win32 has drive letters and *nix doesn't...
  99 #ifdef WIN32
 100                         if ( pathtmp.size() > 1 )
 101 #else
 102                         if ( pathtmp.size() )
 103 #endif
 104                                 pathtmp.resize ( pathtmp.size() - 1 );
 105                 }
 106                 else
 107                         pathtmp.push_back ( prev );
 108                 prev = p;
 109                 p = strtok ( NULL, "/\\" );
 110         }
 111         if ( include_filename )
 112                 pathtmp.push_back ( prev );
 113
 114         // reuse tmp variable to return recombined path
 115         tmp.resize(0);
 116         for ( size_t i = 0; i < pathtmp.size(); i++ )
 117         {
 118                 // this squirreliness is b/c win32 has drive letters and *nix doesn't...
 119 #ifdef WIN32
 120                 if ( i ) tmp += "/";
 121 #else
 122                 tmp += "/";
 123 #endif
 124                 tmp += pathtmp[i];
 125         }
 126         return tmp;
 127 }
 128
 129 /*static*/ string
 130 Path::RelativeFromWorkingDirectory ( const string& path )
 131 {
 132         vector<string> vwork, vpath, vout;
 133         Path::Split ( vwork, working_directory, true );
 134         Path::Split ( vpath, path, true );
 135 #ifdef WIN32
 136         // this squirreliness is b/c win32 has drive letters and *nix doesn't...
 137         // not possible to do relative across different drive letters
 138         if ( vwork[0] != vpath[0] )
 139                 return path;
 140 #endif
 141         size_t i = 0;
 142         while ( i < vwork.size() && i < vpath.size() && vwork[i] == vpath[i] )
 143                 ++i;
 144         if ( i < vwork.size() )
 145         {
 146                 // path goes above our working directory, we will need some ..'s
 147                 for ( size_t j = 0; j < i; j++ )
 148                         vout.push_back ( ".." );
 149         }
 150         while ( i < vpath.size() )
 151                 vout.push_back ( vpath[i++] );
 152
 153         // now merge vout into a string again
 154         string out;
 155         for ( i = 0; i < vout.size(); i++ )
 156         {
 157                 // this squirreliness is b/c win32 has drive letters and *nix doesn't...
 158 #ifdef WIN32
 159                 if ( i ) out += "/";
 160 #else
 161                 out += "/";
 162 #endif
 163                 out += vout[i];
 164         }
 165         return out;
 166 }
 167
 168 /*static*/ void
 169 Path::Split ( vector<string>& out,
 170               const string& path,
 171               bool include_last )
 172 {
 173         string s ( path );
 174         const char* prev = strtok ( &s[0], "/\\" );
 175         const char* p = strtok ( NULL, "/\\" );
 176         out.resize ( 0 );
 177         while ( p )
 178         {
 179                 out.push_back ( prev );
 180                 prev = p;
 181                 p = strtok ( NULL, "/\\" );
 182         }
 183         if ( include_last )
 184                 out.push_back ( prev );
 185 }
 186
 187 XMLFile::XMLFile()
 188 {
 189 }
 190
 191 void
 192 XMLFile::close()
 193 {
 194         _buf.resize(0);
 195         _p = _end = NULL;
 196 }
 197
 198 bool
 199 XMLFile::open(const string& filename)
 200 {
 201         close();
 202         FILE* f = fopen ( filename.c_str(), "rb" );
 203         if ( !f )
 204                 return false;
 205         unsigned long len = (unsigned long)filelen(f);
 206         _buf.resize ( len );
 207         fread ( &_buf[0], 1, len, f );
 208         fclose ( f );
 209         _p = _buf.c_str();
 210         _end = _p + len;
 211         next_token();
 212         return true;
 213 }
 214
 215 // next_token() moves the pointer to next token, which may be
 216 // an xml element or a text element, basically it's a glorified
 217 // skipspace, normally the user of this class won't need to call
 218 // this function
 219 void
 220 XMLFile::next_token()
 221 {
 222         _p += strspn ( _p, WS );
 223 }
 224
 225 bool
 226 XMLFile::next_is_text()
 227 {
 228         return *_p != '<';
 229 }
 230
 231 bool
 232 XMLFile::more_tokens()
 233 {
 234         return _p != _end;
 235 }
 236
 237 // get_token() is used to return a token, and move the pointer
 238 // past the token
 239 bool
 240 XMLFile::get_token(string& token)
 241 {
 242         const char* tokend;
 243         if ( !strncmp ( _p, "<!--", 4 ) )
 244         {
 245                 tokend = strstr ( _p, "-->" );
 246                 if ( !tokend )
 247                         tokend = _end;
 248                 else
 249                         tokend += 3;
 250         }
 251         else if ( *_p == '<' )
 252         {
 253                 tokend = strchr ( _p, '>' );
 254                 if ( !tokend )
 255                         tokend = _end;
 256                 else
 257                         ++tokend;
 258         }
 259         else
 260         {
 261                 tokend = strchr ( _p, '<' );
 262                 if ( !tokend )
 263                         tokend = _end;
 264                 while ( tokend > _p && isspace(tokend[-1]) )
 265                         --tokend;
 266         }
 267         if ( tokend == _p )
 268                 return false;
 269         token = string ( _p, tokend-_p );
 270         _p = tokend;
 271         next_token();
 272         return true;
 273 }
 274
 275 XMLAttribute::XMLAttribute()
 276 {
 277 }
 278
 279 XMLAttribute::XMLAttribute(const string& name_,
 280                            const string& value_)
 281         : name(name_), value(value_)
 282 {
 283 }
 284
 285 XMLElement::XMLElement()
 286         : parentElement(NULL)
 287 {
 288 }
 289
 290 XMLElement::~XMLElement()
 291 {
 292         size_t i;
 293         for ( i = 0; i < attributes.size(); i++ )
 294                 delete attributes[i];
 295         for ( i = 0; i < subElements.size(); i++ )
 296                 delete subElements[i];
 297 }
 298
 299 void
 300 XMLElement::AddSubElement ( XMLElement* e )
 301 {
 302         subElements.push_back ( e );
 303         e->parentElement = this;
 304 }
 305
 306 // Parse()
 307 // This function takes a single xml tag ( i.e. beginning with '<' and
 308 // ending with '>', and parses out it's tag name and constituent
 309 // attributes.
 310 // Return Value: returns true if you need to look for a </tag> for
 311 // the one it just parsed...
 312 bool
 313 XMLElement::Parse(const string& token,
 314                   bool& end_tag)
 315 {
 316         const char* p = token.c_str();
 317         assert ( *p == '<' );
 318         ++p;
 319         p += strspn ( p, WS );
 320
 321         // check if this is a comment
 322         if ( !strncmp ( p, "!--", 3 ) )
 323         {
 324                 name = "!--";
 325                 end_tag = false;
 326                 return false; // never look for end tag to a comment
 327         }
 328
 329         end_tag = ( *p == '/' );
 330         if ( end_tag )
 331         {
 332                 ++p;
 333                 p += strspn ( p, WS );
 334         }
 335         const char* end = strpbrk ( p, WS );
 336         if ( !end )
 337         {
 338                 end = strpbrk ( p, "/>" );
 339                 assert ( end );
 340         }
 341         name = string ( p, end-p );
 342         p = end;
 343         p += strspn ( p, WS );
 344         while ( *p != '>' && *p != '/' )
 345         {
 346                 end = strpbrk ( p, WSEQ );
 347                 if ( !end )
 348                 {
 349                         end = strpbrk ( p, "/>" );
 350                         assert ( end );
 351                 }
 352                 string attribute ( p, end-p ), value;
 353                 p = end;
 354                 p += strspn ( p, WS );
 355                 if ( *p == '=' )
 356                 {
 357                         ++p;
 358                         p += strspn ( p, WS );
 359                         char quote = 0;
 360                         if ( strchr ( "\"'", *p ) )
 361                         {
 362                                 quote = *p++;
 363                                 end = strchr ( p, quote );
 364                         }
 365                         else
 366                         {
 367                                 end = strpbrk ( p, WS );
 368                         }
 369                         if ( !end )
 370                         {
 371                                 end = strchr ( p, '>' );
 372                                 assert(end);
 373                                 if ( end[-1] == '/' )
 374                                         end--;
 375                         }
 376                         value = string ( p, end-p );
 377                         p = end;
 378                         if ( quote && *p == quote )
 379                                 p++;
 380                         p += strspn ( p, WS );
 381                 }
 382                 attributes.push_back ( new XMLAttribute ( attribute, value ) );
 383         }
 384         return !( *p == '/' ) && !end_tag;
 385 }
 386
 387 XMLAttribute*
 388 XMLElement::GetAttribute ( const string& attribute,
 389                            bool required )
 390 {
 391         // this would be faster with a tree-based container, but our attribute
 392         // lists are likely to stay so short as to not be an issue.
 393         for ( size_t i = 0; i < attributes.size(); i++ )
 394         {
 395                 if ( attribute == attributes[i]->name )
 396                         return attributes[i];
 397         }
 398         if ( required )
 399         {
 400                 printf ( "syntax error: attribute '%s' required for <%s>\n",
 401                         attribute.c_str(), name.c_str() );
 402         }
 403         return NULL;
 404 }
 405
 406 const XMLAttribute*
 407 XMLElement::GetAttribute ( const string& attribute,
 408                            bool required ) const
 409 {
 410         // this would be faster with a tree-based container, but our attribute
 411         // lists are likely to stay so short as to not be an issue.
 412         for ( size_t i = 0; i < attributes.size(); i++ )
 413         {
 414                 if ( attribute == attributes[i]->name )
 415                         return attributes[i];
 416         }
 417         if ( required )
 418         {
 419                 printf ( "syntax error: attribute '%s' required for <%s>\n",
 420                         attribute.c_str(), name.c_str() );
 421         }
 422         return NULL;
 423 }
 424
 425 // XMLParse()
 426 // This function reads a "token" from the file loaded in XMLFile
 427 // REM TODO FIXME: At the moment it can't handle comments or non-xml tags.
 428 // if it finds a tag that is non-singular, it parses sub-elements and/or
 429 // inner text into the XMLElement that it is building to return.
 430 // Return Value: an XMLElement allocated via the new operator that contains
 431 // it's parsed data. Keep calling this function until it returns NULL
 432 // (no more data)
 433 XMLElement*
 434 XMLParse(XMLFile& f,
 435          const Path& path,
 436          bool* pend_tag /*= NULL*/)
 437 {
 438         string token;
 439         if ( !f.get_token(token) )
 440                 return NULL;
 441         bool end_tag;
 442
 443         while ( token[0] != '<' || !strncmp ( token.c_str(), "<!--", 4 ) )
 444         {
 445                 if ( token[0] != '<' )
 446                         printf ( "syntax error: expecting xml tag, not '%s'\n", token.c_str() );
 447                 if ( !f.get_token(token) )
 448                         return NULL;
 449         }
 450
 451         XMLElement* e = new XMLElement;
 452         bool bNeedEnd = e->Parse ( token, end_tag );
 453
 454         if ( e->name == "xi:include" )
 455         {
 456                 XMLAttribute* att;
 457                 att = e->GetAttribute("href",true);
 458                 if ( att )
 459                 {
 460                         string file ( path.Fixup(att->value,true) );
 461                         string top_file ( Path::RelativeFromWorkingDirectory ( file ) );
 462                         e->attributes.push_back ( new XMLAttribute ( "top_href", top_file ) );
 463                         XMLFile fInc;
 464                         if ( !fInc.open ( file ) )
 465                                 printf ( "xi:include error, couldn't find file '%s'\n", file.c_str() );
 466                         else
 467                         {
 468                                 Path path2 ( path, att->value );
 469                                 for ( ;; )
 470                                 {
 471                                         XMLElement* e2 = XMLParse ( fInc, path2 );
 472                                         if ( !e2 )
 473                                                 break;
 474                                         e->AddSubElement ( e2 );
 475                                 }
 476                         }
 477                 }
 478         }
 479
 480         if ( !bNeedEnd )
 481         {
 482                 if ( pend_tag )
 483                         *pend_tag = end_tag;
 484                 else if ( end_tag )
 485                 {
 486                         delete e;
 487                         printf ( "syntax error: end tag '%s' not expected\n", token.c_str() );
 488                         return NULL;
 489                 }
 490                 return e;
 491         }
 492         bool bThisMixingErrorReported = false;
 493         while ( f.more_tokens() )
 494         {
 495                 if ( f.next_is_text() )
 496                 {
 497                         if ( !f.get_token ( token ) || !token.size() )
 498                         {
 499                                 printf ( "internal tool error - get_token() failed when more_tokens() returned true\n" );
 500                                 break;
 501                         }
 502                         if ( e->subElements.size() && !bThisMixingErrorReported )
 503                         {
 504                                 printf ( "syntax error: mixing of inner text with sub elements\n" );
 505                                 bThisMixingErrorReported = true;
 506                         }
 507                         if ( e->value.size() )
 508                         {
 509                                 printf ( "syntax error: multiple instances of inner text\n" );
 510                                 e->value += " " + token;
 511                         }
 512                         else
 513                                 e->value = token;
 514                 }
 515                 else
 516                 {
 517                         XMLElement* e2 = XMLParse ( f, path, &end_tag );
 518                         if ( end_tag )
 519                         {
 520                                 if ( e->name != e2->name )
 521                                         printf ( "end tag name mismatch\n" );
 522                                 delete e2;
 523                                 break;
 524                         }
 525                         if ( e->value.size() && !bThisMixingErrorReported )
 526                         {
 527                                 printf ( "syntax error: mixing of inner text with sub elements\n" );
 528                                 bThisMixingErrorReported = true;
 529                         }
 530                         e->AddSubElement ( e2 );
 531                 }
 532         }
 533         return e;
 534 }