reactos/tools/rbuild/XML.cpp

   1 // XML.cpp
   2
   3 #ifdef _MSC_VER
   4 #pragma warning ( disable : 4786 ) // identifier was truncated to '255' characters in the debug information
   5 #endif//_MSC_VER
   6
   7 #include <direct.h>
   8 #include <io.h>
   9 #include <assert.h>
  10
  11 #include "XML.h"
  12
  13 using std::string;
  14 using std::vector;
  15
  16 #ifdef WIN32
  17 #define getcwd _getcwd
  18 #endif//WIN32
  19
  20 static const char* WS = " \t\r\n";
  21 static const char* WSEQ = " =\t\r\n";
  22
  23 string working_directory;
  24
  25 void
  26 InitWorkingDirectory()
  27 {
  28         // store the current directory for path calculations
  29         working_directory.resize ( _MAX_PATH );
  30         working_directory[0] = 0;
  31         getcwd ( &working_directory[0], working_directory.size() );
  32         working_directory.resize ( strlen ( working_directory.c_str() ) );
  33 }
  34
  35 #ifdef _MSC_VER
  36 unsigned __int64
  37 #else
  38 unsigned long long
  39 #endif
  40 filelen ( FILE* f )
  41 {
  42 #ifdef WIN32
  43         return _filelengthi64 ( _fileno(f) );
  44 #elif defined(UNIX)
  45         struct stat64 file_stat;
  46         if ( fstat64(fileno(f), &file_stat) != 0 )
  47                 return 0;
  48         return file_stat.st_size;
  49 #endif
  50 }
  51
  52 Path::Path()
  53 {
  54         string s ( working_directory );
  55         const char* p = strtok ( &s[0], "/\\" );
  56         while ( p )
  57         {
  58                 if ( *p )
  59                         path.push_back ( p );
  60                 p = strtok ( NULL, "/\\" );
  61         }
  62 }
  63
  64 Path::Path ( const Path& cwd, const string& file )
  65 {
  66         string s ( cwd.Fixup ( file, false ) );
  67         const char* p = strtok ( &s[0], "/\\" );
  68         while ( p )
  69         {
  70                 if ( *p )
  71                         path.push_back ( p );
  72                 p = strtok ( NULL, "/\\" );
  73         }
  74 }
  75
  76 string
  77 Path::Fixup ( const string& file, bool include_filename ) const
  78 {
  79         if ( strchr ( "/\\", file[0] )
  80 #ifdef WIN32
  81                 // this squirreliness is b/c win32 has drive letters and *nix doesn't...
  82                 || file[1] == ':'
  83 #endif//WIN32
  84                 )
  85         {
  86                 return file;
  87         }
  88         vector<string> pathtmp ( path );
  89         string tmp ( file );
  90         const char* prev = strtok ( &tmp[0], "/\\" );
  91         const char* p = strtok ( NULL, "/\\" );
  92         while ( p )
  93         {
  94                 if ( !strcmp ( prev, "." ) )
  95                         ; // do nothing
  96                 else if ( !strcmp ( prev, ".." ) )
  97                 {
  98                         // this squirreliness is b/c win32 has drive letters and *nix doesn't...
  99 #ifdef WIN32
 100                         if ( pathtmp.size() > 1 )
 101 #else
 102                         if ( pathtmp.size() )
 103 #endif
 104                                 pathtmp.resize ( pathtmp.size() - 1 );
 105                 }
 106                 else
 107                         pathtmp.push_back ( prev );
 108                 prev = p;
 109                 p = strtok ( NULL, "/\\" );
 110         }
 111         if ( include_filename )
 112                 pathtmp.push_back ( prev );
 113
 114         // reuse tmp variable to return recombined path
 115         tmp.resize(0);
 116         for ( size_t i = 0; i < pathtmp.size(); i++ )
 117         {
 118                 // this squirreliness is b/c win32 has drive letters and *nix doesn't...
 119 #ifdef WIN32
 120                 if ( i ) tmp += "/";
 121 #else
 122                 tmp += "/";
 123 #endif
 124                 tmp += pathtmp[i];
 125         }
 126         return tmp;
 127 }
 128
 129 /*static*/ string
 130 Path::RelativeFromWorkingDirectory ( const string& path )
 131 {
 132         vector<string> vwork, vpath, vout;
 133         Path::Split ( vwork, working_directory, true );
 134         Path::Split ( vpath, path, true );
 135 #ifdef WIN32
 136         // this squirreliness is b/c win32 has drive letters and *nix doesn't...
 137         // not possible to do relative across different drive letters
 138         if ( vwork[0] != vpath[0] )
 139                 return path;
 140 #endif
 141         size_t i = 0;
 142         while ( i < vwork.size() && i < vpath.size() && vwork[i] == vpath[i] )
 143                 ++i;
 144         if ( i < vwork.size() )
 145         {
 146                 // path goes above our working directory, we will need some ..'s
 147                 for ( size_t j = 0; j < i; j++ )
 148                         vout.push_back ( ".." );
 149         }
 150         while ( i < vpath.size() )
 151                 vout.push_back ( vpath[i++] );
 152
 153         // now merge vout into a string again
 154         string out;
 155         for ( i = 0; i < vout.size(); i++ )
 156         {
 157                 // this squirreliness is b/c win32 has drive letters and *nix doesn't...
 158 #ifdef WIN32
 159                 if ( i ) out += "/";
 160 #else
 161                 out += "/";
 162 #endif
 163                 out += vout[i];
 164         }
 165         return out;
 166 }
 167
 168 /*static*/ void
 169 Path::Split ( vector<string>& out,
 170               const string& path,
 171               bool include_last )
 172 {
 173         string s ( path );
 174         const char* prev = strtok ( &s[0], "/\\" );
 175         const char* p = strtok ( NULL, "/\\" );
 176         out.resize ( 0 );
 177         while ( p )
 178         {
 179                 out.push_back ( prev );
 180                 prev = p;
 181                 p = strtok ( NULL, "/\\" );
 182         }
 183         if ( include_last )
 184                 out.push_back ( prev );
 185 }
 186
 187 XMLFile::XMLFile()
 188 {
 189 }
 190
 191 void
 192 XMLFile::close()
 193 {
 194         while ( _f.size() )
 195         {
 196                 fclose ( _f.back() );
 197                 _f.pop_back();
 198         }
 199         _buf.resize(0);
 200         _p = _end = NULL;
 201 }
 202
 203 bool
 204 XMLFile::open(const string& filename)
 205 {
 206         close();
 207         FILE* f = fopen ( filename.c_str(), "rb" );
 208         if ( !f )
 209                 return false;
 210         unsigned long len = (unsigned long)filelen(f);
 211         _buf.resize ( len );
 212         fread ( &_buf[0], 1, len, f );
 213         _p = _buf.c_str();
 214         _end = _p + len;
 215         _f.push_back ( f );
 216         next_token();
 217         return true;
 218 }
 219
 220 // next_token() moves the pointer to next token, which may be
 221 // an xml element or a text element, basically it's a glorified
 222 // skipspace, normally the user of this class won't need to call
 223 // this function
 224 void
 225 XMLFile::next_token()
 226 {
 227         _p += strspn ( _p, WS );
 228 }
 229
 230 bool
 231 XMLFile::next_is_text()
 232 {
 233         return *_p != '<';
 234 }
 235
 236 bool
 237 XMLFile::more_tokens()
 238 {
 239         return _p != _end;
 240 }
 241
 242 // get_token() is used to return a token, and move the pointer
 243 // past the token
 244 bool
 245 XMLFile::get_token(string& token)
 246 {
 247         const char* tokend;
 248         if ( !strncmp ( _p, "<!--", 4 ) )
 249         {
 250                 tokend = strstr ( _p, "-->" );
 251                 if ( !tokend )
 252                         tokend = _end;
 253                 else
 254                         tokend += 3;
 255         }
 256         else if ( *_p == '<' )
 257         {
 258                 tokend = strchr ( _p, '>' );
 259                 if ( !tokend )
 260                         tokend = _end;
 261                 else
 262                         ++tokend;
 263         }
 264         else
 265         {
 266                 tokend = strchr ( _p, '<' );
 267                 if ( !tokend )
 268                         tokend = _end;
 269                 while ( tokend > _p && isspace(tokend[-1]) )
 270                         --tokend;
 271         }
 272         if ( tokend == _p )
 273                 return false;
 274         token = string ( _p, tokend-_p );
 275         _p = tokend;
 276         next_token();
 277         return true;
 278 }
 279
 280 XMLAttribute::XMLAttribute()
 281 {
 282 }
 283
 284 XMLAttribute::XMLAttribute(const string& name_,
 285                            const string& value_)
 286         : name(name_), value(value_)
 287 {
 288 }
 289
 290 XMLElement::XMLElement()
 291         : parentElement(NULL)
 292 {
 293 }
 294
 295 XMLElement::~XMLElement()
 296 {
 297         size_t i;
 298         for ( i = 0; i < attributes.size(); i++ )
 299                 delete attributes[i];
 300         for ( i = 0; i < subElements.size(); i++ )
 301                 delete subElements[i];
 302 }
 303
 304 void
 305 XMLElement::AddSubElement ( XMLElement* e )
 306 {
 307         subElements.push_back ( e );
 308         e->parentElement = this;
 309 }
 310
 311 // Parse()
 312 // This function takes a single xml tag ( i.e. beginning with '<' and
 313 // ending with '>', and parses out it's tag name and constituent
 314 // attributes.
 315 // Return Value: returns true if you need to look for a </tag> for
 316 // the one it just parsed...
 317 bool
 318 XMLElement::Parse(const string& token,
 319                   bool& end_tag)
 320 {
 321         const char* p = token.c_str();
 322         assert ( *p == '<' );
 323         ++p;
 324         p += strspn ( p, WS );
 325
 326         // check if this is a comment
 327         if ( !strncmp ( p, "!--", 3 ) )
 328         {
 329                 name = "!--";
 330                 end_tag = false;
 331                 return false; // never look for end tag to a comment
 332         }
 333
 334         end_tag = ( *p == '/' );
 335         if ( end_tag )
 336         {
 337                 ++p;
 338                 p += strspn ( p, WS );
 339         }
 340         const char* end = strpbrk ( p, WS );
 341         if ( !end )
 342         {
 343                 end = strpbrk ( p, "/>" );
 344                 assert ( end );
 345         }
 346         name = string ( p, end-p );
 347         p = end;
 348         p += strspn ( p, WS );
 349         while ( *p != '>' && *p != '/' )
 350         {
 351                 end = strpbrk ( p, WSEQ );
 352                 if ( !end )
 353                 {
 354                         end = strpbrk ( p, "/>" );
 355                         assert ( end );
 356                 }
 357                 string attribute ( p, end-p ), value;
 358                 p = end;
 359                 p += strspn ( p, WS );
 360                 if ( *p == '=' )
 361                 {
 362                         ++p;
 363                         p += strspn ( p, WS );
 364                         char quote = 0;
 365                         if ( strchr ( "\"'", *p ) )
 366                         {
 367                                 quote = *p++;
 368                                 end = strchr ( p, quote );
 369                         }
 370                         else
 371                         {
 372                                 end = strpbrk ( p, WS );
 373                         }
 374                         if ( !end )
 375                         {
 376                                 end = strchr ( p, '>' );
 377                                 assert(end);
 378                                 if ( end[-1] == '/' )
 379                                         end--;
 380                         }
 381                         value = string ( p, end-p );
 382                         p = end;
 383                         if ( quote && *p == quote )
 384                                 p++;
 385                         p += strspn ( p, WS );
 386                 }
 387                 attributes.push_back ( new XMLAttribute ( attribute, value ) );
 388         }
 389         return !( *p == '/' ) && !end_tag;
 390 }
 391
 392 XMLAttribute*
 393 XMLElement::GetAttribute ( const string& attribute,
 394                            bool required )
 395 {
 396         // this would be faster with a tree-based container, but our attribute
 397         // lists are likely to stay so short as to not be an issue.
 398         for ( size_t i = 0; i < attributes.size(); i++ )
 399         {
 400                 if ( attribute == attributes[i]->name )
 401                         return attributes[i];
 402         }
 403         if ( required )
 404         {
 405                 printf ( "syntax error: attribute '%s' required for <%s>\n",
 406                         attribute.c_str(), name.c_str() );
 407         }
 408         return NULL;
 409 }
 410
 411 const XMLAttribute*
 412 XMLElement::GetAttribute ( const string& attribute,
 413                            bool required ) const
 414 {
 415         // this would be faster with a tree-based container, but our attribute
 416         // lists are likely to stay so short as to not be an issue.
 417         for ( size_t i = 0; i < attributes.size(); i++ )
 418         {
 419                 if ( attribute == attributes[i]->name )
 420                         return attributes[i];
 421         }
 422         if ( required )
 423         {
 424                 printf ( "syntax error: attribute '%s' required for <%s>\n",
 425                         attribute.c_str(), name.c_str() );
 426         }
 427         return NULL;
 428 }
 429
 430 // XMLParse()
 431 // This function reads a "token" from the file loaded in XMLFile
 432 // REM TODO FIXME: At the moment it can't handle comments or non-xml tags.
 433 // if it finds a tag that is non-singular, it parses sub-elements and/or
 434 // inner text into the XMLElement that it is building to return.
 435 // Return Value: an XMLElement allocated via the new operator that contains
 436 // it's parsed data. Keep calling this function until it returns NULL
 437 // (no more data)
 438 XMLElement*
 439 XMLParse(XMLFile& f,
 440          const Path& path,
 441          bool* pend_tag /*= NULL*/)
 442 {
 443         string token;
 444         if ( !f.get_token(token) )
 445                 return NULL;
 446         bool end_tag;
 447
 448         while ( token[0] != '<' )
 449         {
 450                 printf ( "syntax error: expecting xml tag, not '%s'\n", token.c_str() );
 451                 if ( !f.get_token(token) )
 452                         return NULL;
 453         }
 454
 455         XMLElement* e = new XMLElement;
 456         bool bNeedEnd = e->Parse ( token, end_tag );
 457
 458         if ( e->name == "xi:include" )
 459         {
 460                 XMLAttribute* att;
 461                 att = e->GetAttribute("href",true);
 462                 if ( att )
 463                 {
 464                         string file ( path.Fixup(att->value,true) );
 465                         string top_file ( Path::RelativeFromWorkingDirectory ( file ) );
 466                         e->attributes.push_back ( new XMLAttribute ( "top_href", top_file ) );
 467                         XMLFile fInc;
 468                         if ( !fInc.open ( file ) )
 469                                 printf ( "xi:include error, couldn't find file '%s'\n", file.c_str() );
 470                         else
 471                         {
 472                                 Path path2 ( path, att->value );
 473                                 for ( ;; )
 474                                 {
 475                                         XMLElement* e2 = XMLParse ( fInc, path2 );
 476                                         if ( !e2 )
 477                                                 break;
 478                                         e->AddSubElement ( e2 );
 479                                 }
 480                         }
 481                 }
 482         }
 483
 484         if ( !bNeedEnd )
 485         {
 486                 if ( pend_tag )
 487                         *pend_tag = end_tag;
 488                 else if ( end_tag )
 489                 {
 490                         delete e;
 491                         printf ( "syntax error: end tag '%s' not expected\n", token.c_str() );
 492                         return NULL;
 493                 }
 494                 return e;
 495         }
 496         bool bThisMixingErrorReported = false;
 497         while ( f.more_tokens() )
 498         {
 499                 if ( f.next_is_text() )
 500                 {
 501                         if ( !f.get_token ( token ) || !token.size() )
 502                         {
 503                                 printf ( "internal tool error - get_token() failed when more_tokens() returned true\n" );
 504                                 break;
 505                         }
 506                         if ( e->subElements.size() && !bThisMixingErrorReported )
 507                         {
 508                                 printf ( "syntax error: mixing of inner text with sub elements\n" );
 509                                 bThisMixingErrorReported = true;
 510                         }
 511                         if ( e->value.size() )
 512                         {
 513                                 printf ( "syntax error: multiple instances of inner text\n" );
 514                                 e->value += " " + token;
 515                         }
 516                         else
 517                                 e->value = token;
 518                 }
 519                 else
 520                 {
 521                         XMLElement* e2 = XMLParse ( f, path, &end_tag );
 522                         if ( end_tag )
 523                         {
 524                                 if ( e->name != e2->name )
 525                                         printf ( "end tag name mismatch\n" );
 526                                 delete e2;
 527                                 break;
 528                         }
 529                         if ( e->value.size() && !bThisMixingErrorReported )
 530                         {
 531                                 printf ( "syntax error: mixing of inner text with sub elements\n" );
 532                                 bThisMixingErrorReported = true;
 533                         }
 534                         e->AddSubElement ( e2 );
 535                 }
 536         }
 537         return e;
 538 }