reactos/tools/rbuild/XML.cpp

   1 // XML.cpp
   2
   3 #include "pch.h"
   4
   5 #include <direct.h>
   6 #include <io.h>
   7 #include <assert.h>
   8
   9 #include "XML.h"
  10 #include "exception.h"
  11 #include "ssprintf.h"
  12
  13 using std::string;
  14 using std::vector;
  15
  16 #ifdef WIN32
  17 #define getcwd _getcwd
  18 #endif//WIN32
  19
  20 static const char* WS = " \t\r\n";
  21 static const char* WSEQ = " =\t\r\n";
  22
  23 string working_directory;
  24
  25 void
  26 InitWorkingDirectory()
  27 {
  28         // store the current directory for path calculations
  29         working_directory.resize ( _MAX_PATH );
  30         working_directory[0] = 0;
  31         getcwd ( &working_directory[0], working_directory.size() );
  32         working_directory.resize ( strlen ( working_directory.c_str() ) );
  33 }
  34
  35 #ifdef _MSC_VER
  36 unsigned __int64
  37 #else
  38 unsigned long long
  39 #endif
  40 filelen ( FILE* f )
  41 {
  42 #ifdef WIN32
  43         return _filelengthi64 ( _fileno(f) );
  44 #elif defined(UNIX)
  45         struct stat64 file_stat;
  46         if ( fstat64(fileno(f), &file_stat) != 0 )
  47                 return 0;
  48         return file_stat.st_size;
  49 #endif
  50 }
  51
  52 Path::Path()
  53 {
  54         if ( !working_directory.size() )
  55                 InitWorkingDirectory();
  56         string s ( working_directory );
  57         const char* p = strtok ( &s[0], "/\\" );
  58         while ( p )
  59         {
  60                 if ( *p )
  61                         path.push_back ( p );
  62                 p = strtok ( NULL, "/\\" );
  63         }
  64 }
  65
  66 Path::Path ( const Path& cwd, const string& file )
  67 {
  68         string s ( cwd.Fixup ( file, false ) );
  69         const char* p = strtok ( &s[0], "/\\" );
  70         while ( p )
  71         {
  72                 if ( *p )
  73                         path.push_back ( p );
  74                 p = strtok ( NULL, "/\\" );
  75         }
  76 }
  77
  78 string
  79 Path::Fixup ( const string& file, bool include_filename ) const
  80 {
  81         if ( strchr ( "/\\", file[0] )
  82 #ifdef WIN32
  83                 // this squirreliness is b/c win32 has drive letters and *nix doesn't...
  84                 || file[1] == ':'
  85 #endif//WIN32
  86                 )
  87         {
  88                 return file;
  89         }
  90         vector<string> pathtmp ( path );
  91         string tmp ( file );
  92         const char* prev = strtok ( &tmp[0], "/\\" );
  93         const char* p = strtok ( NULL, "/\\" );
  94         while ( p )
  95         {
  96                 if ( !strcmp ( prev, "." ) )
  97                         ; // do nothing
  98                 else if ( !strcmp ( prev, ".." ) )
  99                 {
 100                         // this squirreliness is b/c win32 has drive letters and *nix doesn't...
 101 #ifdef WIN32
 102                         if ( pathtmp.size() > 1 )
 103 #else
 104                         if ( pathtmp.size() )
 105 #endif
 106                                 pathtmp.resize ( pathtmp.size() - 1 );
 107                 }
 108                 else
 109                         pathtmp.push_back ( prev );
 110                 prev = p;
 111                 p = strtok ( NULL, "/\\" );
 112         }
 113         if ( include_filename )
 114                 pathtmp.push_back ( prev );
 115
 116         // reuse tmp variable to return recombined path
 117         tmp.resize(0);
 118         for ( size_t i = 0; i < pathtmp.size(); i++ )
 119         {
 120                 // this squirreliness is b/c win32 has drive letters and *nix doesn't...
 121 #ifdef WIN32
 122                 if ( i ) tmp += "/";
 123 #else
 124                 tmp += "/";
 125 #endif
 126                 tmp += pathtmp[i];
 127         }
 128         return tmp;
 129 }
 130
 131 /*static*/ string
 132 Path::RelativeFromWorkingDirectory ( const string& path )
 133 {
 134         vector<string> vwork, vpath, vout;
 135         Path::Split ( vwork, working_directory, true );
 136         Path::Split ( vpath, path, true );
 137 #ifdef WIN32
 138         // this squirreliness is b/c win32 has drive letters and *nix doesn't...
 139         // not possible to do relative across different drive letters
 140         if ( vwork[0] != vpath[0] )
 141                 return path;
 142 #endif
 143         size_t i = 0;
 144         while ( i < vwork.size() && i < vpath.size() && vwork[i] == vpath[i] )
 145                 ++i;
 146         if ( i < vwork.size() )
 147         {
 148                 // path goes above our working directory, we will need some ..'s
 149                 for ( size_t j = 0; j < i; j++ )
 150                         vout.push_back ( ".." );
 151         }
 152         while ( i < vpath.size() )
 153                 vout.push_back ( vpath[i++] );
 154
 155         // now merge vout into a string again
 156         string out;
 157         for ( i = 0; i < vout.size(); i++ )
 158         {
 159                 // this squirreliness is b/c win32 has drive letters and *nix doesn't...
 160 #ifdef WIN32
 161                 if ( i ) out += "/";
 162 #else
 163                 out += "/";
 164 #endif
 165                 out += vout[i];
 166         }
 167         return out;
 168 }
 169
 170 /*static*/ void
 171 Path::Split ( vector<string>& out,
 172               const string& path,
 173               bool include_last )
 174 {
 175         string s ( path );
 176         const char* prev = strtok ( &s[0], "/\\" );
 177         const char* p = strtok ( NULL, "/\\" );
 178         out.resize ( 0 );
 179         while ( p )
 180         {
 181                 out.push_back ( prev );
 182                 prev = p;
 183                 p = strtok ( NULL, "/\\" );
 184         }
 185         if ( include_last )
 186                 out.push_back ( prev );
 187 }
 188
 189 XMLFile::XMLFile()
 190 {
 191 }
 192
 193 void
 194 XMLFile::close()
 195 {
 196         _buf.resize(0);
 197         _p = _end = NULL;
 198 }
 199
 200 bool
 201 XMLFile::open(const string& filename_)
 202 {
 203         close();
 204         FILE* f = fopen ( filename_.c_str(), "rb" );
 205         if ( !f )
 206                 return false;
 207         unsigned long len = (unsigned long)filelen(f);
 208         _buf.resize ( len );
 209         fread ( &_buf[0], 1, len, f );
 210         fclose ( f );
 211         _p = _buf.c_str();
 212         _end = _p + len;
 213         _filename = filename_;
 214         next_token();
 215         return true;
 216 }
 217
 218 // next_token() moves the pointer to next token, which may be
 219 // an xml element or a text element, basically it's a glorified
 220 // skipspace, normally the user of this class won't need to call
 221 // this function
 222 void
 223 XMLFile::next_token()
 224 {
 225         _p += strspn ( _p, WS );
 226 }
 227
 228 bool
 229 XMLFile::next_is_text()
 230 {
 231         return *_p != '<';
 232 }
 233
 234 bool
 235 XMLFile::more_tokens()
 236 {
 237         return _p != _end;
 238 }
 239
 240 // get_token() is used to return a token, and move the pointer
 241 // past the token
 242 bool
 243 XMLFile::get_token(string& token)
 244 {
 245         const char* tokend;
 246         if ( !strncmp ( _p, "<!--", 4 ) )
 247         {
 248                 tokend = strstr ( _p, "-->" );
 249                 if ( !tokend )
 250                         tokend = _end;
 251                 else
 252                         tokend += 3;
 253         }
 254         else if ( !strncmp ( _p, "<?", 2 ) )
 255         {
 256                 tokend = strstr ( _p, "?>" );
 257                 if ( !tokend )
 258                         tokend = _end;
 259                 else
 260                         tokend += 2;
 261         }
 262         else if ( *_p == '<' )
 263         {
 264                 tokend = strchr ( _p, '>' );
 265                 if ( !tokend )
 266                         tokend = _end;
 267                 else
 268                         ++tokend;
 269         }
 270         else
 271         {
 272                 tokend = strchr ( _p, '<' );
 273                 if ( !tokend )
 274                         tokend = _end;
 275                 while ( tokend > _p && isspace(tokend[-1]) )
 276                         --tokend;
 277         }
 278         if ( tokend == _p )
 279                 return false;
 280         token = string ( _p, tokend-_p );
 281         _p = tokend;
 282         next_token();
 283         return true;
 284 }
 285
 286 string
 287 XMLFile::Location() const
 288 {
 289         int line = 1;
 290         const char* p = strchr ( _buf.c_str(), '\n' );
 291         while ( p && p < _p )
 292         {
 293                 ++line;
 294                 p = strchr ( p+1, '\n' );
 295         }
 296         return ssprintf ( "%s(%i)",_filename.c_str(), line );
 297 }
 298
 299 XMLAttribute::XMLAttribute()
 300 {
 301 }
 302
 303 XMLAttribute::XMLAttribute(const string& name_,
 304                            const string& value_)
 305         : name(name_), value(value_)
 306 {
 307 }
 308
 309 XMLElement::XMLElement()
 310         : parentElement(NULL)
 311 {
 312 }
 313
 314 XMLElement::~XMLElement()
 315 {
 316         size_t i;
 317         for ( i = 0; i < attributes.size(); i++ )
 318                 delete attributes[i];
 319         for ( i = 0; i < subElements.size(); i++ )
 320                 delete subElements[i];
 321 }
 322
 323 void
 324 XMLElement::AddSubElement ( XMLElement* e )
 325 {
 326         subElements.push_back ( e );
 327         e->parentElement = this;
 328 }
 329
 330 // Parse()
 331 // This function takes a single xml tag ( i.e. beginning with '<' and
 332 // ending with '>', and parses out it's tag name and constituent
 333 // attributes.
 334 // Return Value: returns true if you need to look for a </tag> for
 335 // the one it just parsed...
 336 bool
 337 XMLElement::Parse(const string& token,
 338                   bool& end_tag)
 339 {
 340         const char* p = token.c_str();
 341         assert ( *p == '<' );
 342         ++p;
 343         p += strspn ( p, WS );
 344
 345         // check if this is a comment
 346         if ( !strncmp ( p, "!--", 3 ) )
 347         {
 348                 name = "!--";
 349                 end_tag = false;
 350                 return false; // never look for end tag to a comment
 351         }
 352
 353         end_tag = ( *p == '/' );
 354         if ( end_tag )
 355         {
 356                 ++p;
 357                 p += strspn ( p, WS );
 358         }
 359         const char* end = strpbrk ( p, WS );
 360         if ( !end )
 361         {
 362                 end = strpbrk ( p, "/>" );
 363                 assert ( end );
 364         }
 365         name = string ( p, end-p );
 366         p = end;
 367         p += strspn ( p, WS );
 368         while ( *p != '>' && *p != '/' )
 369         {
 370                 end = strpbrk ( p, WSEQ );
 371                 if ( !end )
 372                 {
 373                         end = strpbrk ( p, "/>" );
 374                         assert ( end );
 375                 }
 376                 string attribute ( p, end-p ), value;
 377                 p = end;
 378                 p += strspn ( p, WS );
 379                 if ( *p == '=' )
 380                 {
 381                         ++p;
 382                         p += strspn ( p, WS );
 383                         char quote = 0;
 384                         if ( strchr ( "\"'", *p ) )
 385                         {
 386                                 quote = *p++;
 387                                 end = strchr ( p, quote );
 388                         }
 389                         else
 390                         {
 391                                 end = strpbrk ( p, WS );
 392                         }
 393                         if ( !end )
 394                         {
 395                                 end = strchr ( p, '>' );
 396                                 assert(end);
 397                                 if ( end[-1] == '/' )
 398                                         end--;
 399                         }
 400                         value = string ( p, end-p );
 401                         p = end;
 402                         if ( quote && *p == quote )
 403                                 p++;
 404                         p += strspn ( p, WS );
 405                 }
 406                 attributes.push_back ( new XMLAttribute ( attribute, value ) );
 407         }
 408         return !( *p == '/' ) && !end_tag;
 409 }
 410
 411 XMLAttribute*
 412 XMLElement::GetAttribute ( const string& attribute,
 413                            bool required )
 414 {
 415         // this would be faster with a tree-based container, but our attribute
 416         // lists are likely to stay so short as to not be an issue.
 417         for ( size_t i = 0; i < attributes.size(); i++ )
 418         {
 419                 if ( attribute == attributes[i]->name )
 420                         return attributes[i];
 421         }
 422         if ( required )
 423         {
 424                 throw RequiredAttributeNotFoundException ( attribute,
 425                                                            name );
 426         }
 427         return NULL;
 428 }
 429
 430 const XMLAttribute*
 431 XMLElement::GetAttribute ( const string& attribute,
 432                            bool required ) const
 433 {
 434         // this would be faster with a tree-based container, but our attribute
 435         // lists are likely to stay so short as to not be an issue.
 436         for ( size_t i = 0; i < attributes.size(); i++ )
 437         {
 438                 if ( attribute == attributes[i]->name )
 439                         return attributes[i];
 440         }
 441         if ( required )
 442         {
 443                 throw RequiredAttributeNotFoundException ( attribute,
 444                                                            name );
 445         }
 446         return NULL;
 447 }
 448
 449 // XMLParse()
 450 // This function reads a "token" from the file loaded in XMLFile
 451 // REM TODO FIXME: At the moment it can't handle comments or non-xml tags.
 452 // if it finds a tag that is non-singular, it parses sub-elements and/or
 453 // inner text into the XMLElement that it is building to return.
 454 // Return Value: an XMLElement allocated via the new operator that contains
 455 // it's parsed data. Keep calling this function until it returns NULL
 456 // (no more data)
 457 XMLElement*
 458 XMLParse(XMLFile& f,
 459          const Path& path,
 460          bool* pend_tag /*= NULL*/)
 461 {
 462         string token;
 463         if ( !f.get_token(token) )
 464                 return NULL;
 465         bool end_tag;
 466
 467         while ( token[0] != '<'
 468                 || !strncmp ( token.c_str(), "<!--", 4 )
 469                 || !strncmp ( token.c_str(), "<?", 2 ) )
 470         {
 471                 if ( token[0] != '<' )
 472                         throw XMLSyntaxErrorException ( f.Location(),
 473                                                         "expecting xml tag, not '%s'",
 474                                                         token.c_str() );
 475                 if ( !f.get_token(token) )
 476                         return NULL;
 477         }
 478
 479         XMLElement* e = new XMLElement;
 480         bool bNeedEnd = e->Parse ( token, end_tag );
 481
 482         if ( e->name == "xi:include" )
 483         {
 484                 XMLAttribute* att;
 485                 att = e->GetAttribute("href",true);
 486                 assert(att);
 487
 488                 string file ( path.Fixup(att->value,true) );
 489                 string top_file ( Path::RelativeFromWorkingDirectory ( file ) );
 490                 e->attributes.push_back ( new XMLAttribute ( "top_href", top_file ) );
 491                 XMLFile fInc;
 492                 if ( !fInc.open ( file ) )
 493                         throw FileNotFoundException (
 494                                 ssprintf("%s (referenced from %s)",
 495                                         file.c_str(),
 496                                         f.Location().c_str() ) );
 497                 else
 498                 {
 499                         Path path2 ( path, att->value );
 500                         for ( ;; )
 501                         {
 502                                 XMLElement* e2 = XMLParse ( fInc, path2 );
 503                                 if ( !e2 )
 504                                         break;
 505                                 e->AddSubElement ( e2 );
 506                         }
 507                 }
 508         }
 509
 510         if ( !bNeedEnd )
 511         {
 512                 if ( pend_tag )
 513                         *pend_tag = end_tag;
 514                 else if ( end_tag )
 515                 {
 516                         delete e;
 517                         throw XMLSyntaxErrorException ( f.Location(),
 518                                                         "end tag '%s' not expected",
 519                                                         token.c_str() );
 520                         return NULL;
 521                 }
 522                 return e;
 523         }
 524         bool bThisMixingErrorReported = false;
 525         while ( f.more_tokens() )
 526         {
 527                 if ( f.next_is_text() )
 528                 {
 529                         if ( !f.get_token ( token ) || !token.size() )
 530                         {
 531                                 throw Exception ( "internal tool error - get_token() failed when more_tokens() returned true" );
 532                                 break;
 533                         }
 534                         if ( e->subElements.size() && !bThisMixingErrorReported )
 535                         {
 536                                 throw XMLSyntaxErrorException ( f.Location(),
 537                                                                 "mixing of inner text with sub elements" );
 538                                 bThisMixingErrorReported = true;
 539                         }
 540                         if ( strchr ( token.c_str(), '>' ) )
 541                         {
 542                                 throw XMLSyntaxErrorException ( f.Location(),
 543                                                                 "invalid symbol '>'" );
 544                         }
 545                         if ( e->value.size() )
 546                         {
 547                                 throw XMLSyntaxErrorException ( f.Location(),
 548                                                                 "multiple instances of inner text" );
 549                                 e->value += " " + token;
 550                         }
 551                         else
 552                                 e->value = token;
 553                 }
 554                 else
 555                 {
 556                         XMLElement* e2 = XMLParse ( f, path, &end_tag );
 557                         if ( end_tag )
 558                         {
 559                                 if ( e->name != e2->name )
 560                                         throw XMLSyntaxErrorException ( f.Location(),
 561                                                                         "end tag name mismatch" );
 562                                 delete e2;
 563                                 break;
 564                         }
 565                         if ( e->value.size() && !bThisMixingErrorReported )
 566                         {
 567                                 throw XMLSyntaxErrorException ( f.Location(),
 568                                                                 "mixing of inner text with sub elements" );
 569                                 bThisMixingErrorReported = true;
 570                         }
 571                         e->AddSubElement ( e2 );
 572                 }
 573         }
 574         return e;
 575 }