4 #pragma warning ( disable : 4786 )
7 #define WIN32_LEAN_AND_MEAN
14 #include "EnumFilesImpl.h"
18 #include "binary2cstr.h"
19 #include "strip_comments.h"
22 #include "iskeyword.h"
26 #define TOKASSERT(x) \
29 printf("ASSERT FAILURE: (%s) at %s:%i\n", #x, __FILE__, __LINE__);\
30 printf("WHILE PROCESSING: \n");\
31 for ( int ajf83pfj = 0; ajf83pfj < tokens.size(); ajf83pfj++ )\
32 printf("%s ", tokens[ajf83pfj].c_str() );\
39 vector
<Header
*> headers
;
41 bool import_file ( const char* filename
);
42 char* findend ( char* p
, bool& externc
);
43 Type
identify ( const vector
<string
>& tokens
, int off
= 0 );
44 Type
process ( const string
& element
, vector
<string
>& names
, bool& isTypedef
, vector
<string
>& dependencies
);
45 void process_preprocessor ( const char* filename
, Header
& h
, const string
& element
);
46 void process_c ( Header
& h
, const string
& element
);
47 int parse_type ( Type t
, const vector
<string
>& tokens
, int off
, vector
<string
>& names
, vector
<string
>& dependencies
);
48 int parse_ignored_statement ( const vector
<string
>& tokens
, int off
, vector
<string
>& names
, vector
<string
>& dependencies
);
49 int parse_tident ( const vector
<string
>& tokens
, int off
, vector
<string
>& names
, vector
<string
>& dependencies
);
50 int parse_variable ( const vector
<string
>& tokens
, int off
, vector
<string
>& names
, vector
<string
>& dependencies
);
51 int parse_struct ( const vector
<string
>& tokens
, int off
, vector
<string
>& names
, vector
<string
>& dependencies
);
52 int parse_function ( const vector
<string
>& tokens
, int off
, vector
<string
>& names
, vector
<string
>& dependencies
);
53 int parse_function_ptr ( const vector
<string
>& tokens
, int off
, vector
<string
>& names
, vector
<string
>& dependencies
);
54 int parse_ifwhile ( const vector
<string
>& tokens
, int off
, vector
<string
>& names
, vector
<string
>& dependencies
);
55 int parse_do ( const vector
<string
>& tokens
, int off
, vector
<string
>& names
, vector
<string
>& dependencies
);
57 const char* libc_includes
[] =
70 bool is_libc_include ( const string
& inc
)
74 for ( int i
= 0; i
< sizeof(libc_includes
)/sizeof(libc_includes
[0]); i
++ )
76 if ( s
== libc_includes
[i
] )
82 BOOL
FileEnumProc ( PWIN32_FIND_DATA pwfd
, const char* filename
, long lParam
)
84 if ( !is_libc_include ( filename
) )
85 import_file ( filename
);
91 //import_file ( "coff.h" );
93 File
f ( "input.lst", "r" );
96 printf ( "Couldn't open \"input.lst\" for input\nPress any key to exit\n" );
101 while ( f
.next_line ( filename
, true ) )
102 import_file ( filename
.c_str() );
103 //printf ( "press any key to start\n" );
106 import_file ( "../test.h" );
108 EnumFilesInDirectory ( "c:/cvs/reactos/apps/utils/sdkparse/include", "*.h", FileEnumProc, 0, TRUE, FALSE );
110 printf ( "Done!\nPress any key to exit!\n" );
114 bool import_file ( const char* filename
)
118 for ( i
= 0; i
< headers
.size(); i
++ )
120 if ( headers
[i
]->filename
== filename
)
125 if ( !File::LoadIntoString ( s
, filename
) )
127 printf ( "Couldn't load \"%s\" for input.\n", filename
);
131 printf ( "%s\n", filename
);
133 // strip comments from the file...
134 strip_comments ( s
, true );
137 string no_comments ( filename );
138 no_comments += ".nocom.txt";
139 File::SaveFromString ( no_comments.c_str(), s, false );
142 Header
* h
= new Header ( filename
);
143 headers
.push_back ( h
);
152 // check for pre-processor command
155 char* end
= strchr ( p
, '\n' );
156 while ( end
&& end
[-1] == '\\' )
157 end
= strchr ( end
+1, '\n' );
160 string
element ( p
, end
-p
);
162 process_preprocessor ( filename
, *h
, element
);
166 else if ( *p
== '}' && h
->externc
)
171 if ( *p
== ';' ) p
++;
175 bool externc
= false;
176 char* end
= findend ( p
, externc
);
182 string
element ( p
, end
-p
);
184 process_c ( *h
, element
);
193 string
get_hdrguardtext ( const char* filename
)
195 string
s ( filename
);
198 while ( (p2
= strchr(p
, '\\')) )
200 while ( (p2
= strchr(p
,'/')) )
202 char* end
= strchr ( p
, '.' );
204 while ( (p2
= strchr(end
+1,'.')) )
206 string
hdrguardtext ( p
, end
-p
);
207 strupr ( &hdrguardtext
[0] );
211 void process_preprocessor ( const char* filename
, Header
& h
, const string
& element
)
213 string
hdrguardtext ( get_hdrguardtext ( filename
) );
215 const char* p
= &element
[0];
216 ASSERT ( *p
== '#' );
220 while ( iscsym(*end
) )
222 string
preproc ( p
, end
-p
);
226 const string dbg_filename
= "napi/lpc.h DISABLE DISABLE DISABLE";
228 if ( preproc
== "include" )
230 //if ( h.filename == "napi/lpc.h" )
232 ASSERT ( *p
== '<' || *p
== '\"' );
235 const char* end
= strpbrk ( p
, ">\"" );
238 while ( end
> p
&& isspace(end
[-1]) )
240 string
include_filename ( p
, end
-p
);
241 if ( is_libc_include ( include_filename
) )
242 h
.libc_includes
.push_back ( include_filename
);
246 for ( int i
= 0; i
< headers
.size() && !loaded
; i
++ )
248 if ( headers
[i
]->filename
== include_filename
)
250 if ( !headers
[i
]->done
)
252 printf ( "circular dependency between '%s' and '%s'\n", filename
, include_filename
.c_str() );
260 printf ( "(diverting to '%s')\n", include_filename
.c_str() );
261 import_file ( include_filename
.c_str() );
262 printf ( "(now back to '%s')\n", filename
);
264 h
.includes
.push_back ( include_filename
);
267 else if ( preproc
== "define" )
269 size_t len
= element
.size();
270 if ( strstr ( element
.c_str(), hdrguardtext
.c_str() )
271 && element
[len
-2] == '_'
272 && element
[len
-1] == 'H' )
274 // header include guard... ignore!
277 Symbol
*s
= new Symbol
;
284 while ( iscsym(*end
) )
287 s
->names
.push_back ( string(p
,end
-p
) );
289 s
->definition
= element
;
291 h
.symbols
.push_back ( s
);
293 else if ( preproc
== "undef" )
295 // safely ignoreable for now, I think
297 else if ( preproc
== "if" || preproc
== "ifdef" || preproc
== "ifndef" )
299 if ( dbg_filename
== h
.filename
)
300 printf ( "(%s) PRE-PUSH preproc stack = %lu\n", preproc
.c_str(), h
.ifs
.size() );
301 size_t len
= element
.size();
302 // check for header include guard...
303 if ( strstr ( element
.c_str(), hdrguardtext
.c_str() )
304 && element
[len
-2] == '_'
305 && element
[len
-1] == 'H' )
306 h
.ifs
.push_back ( string("") );
308 h
.ifs
.push_back ( element
);
309 h
.ifspreproc
.push_back ( preproc
);
310 if ( dbg_filename
== h
.filename
)
311 printf ( "POST-PUSH preproc stack = %lu\n", h
.ifs
.size() );
313 else if ( preproc
== "endif" )
315 if ( dbg_filename
== h
.filename
)
316 printf ( "(%s) PRE-POP preproc stack = %lu\n", preproc
.c_str(), h
.ifs
.size() );
317 ASSERT ( h
.ifs
.size() > 0 && h
.ifs
.size() == h
.ifspreproc
.size() );
319 h
.ifspreproc
.pop_back();
320 if ( dbg_filename
== h
.filename
)
321 printf ( "POST-POP preproc stack = %lu\n", h
.ifs
.size() );
323 else if ( preproc
== "elif" )
325 if ( dbg_filename
== h
.filename
)
326 printf ( "(%s) PRE-PUSHPOP preproc stack = %lu\n", preproc
.c_str(), h
.ifs
.size() );
327 string
& oldpre
= h
.ifspreproc
.back();
328 string old
= h
.ifs
.back();
330 if ( oldpre
== "ifdef" )
331 condold
= string("!defined(") + old
+ ")";
332 else if ( oldpre
== "ifndef" )
333 condold
= string("defined(") + old
+ ")";
334 else if ( oldpre
== "if" )
335 condold
= string("!(") + old
+ ")";
338 printf ( "unrecognized preproc '%s'\n", oldpre
.c_str() );
342 h
.ifs
.back() = string("(") + element
+ ") && " + condold
;
343 h
.ifspreproc
.back() = "if";
344 if ( dbg_filename
== h
.filename
)
345 printf ( "POST-PUSHPOP preproc stack = %lu\n", h
.ifs
.size() );
347 else if ( preproc
== "else" )
349 if ( dbg_filename
== h
.filename
)
350 printf ( "(%s) PRE-PUSHPOP preproc stack = %lu\n", preproc
.c_str(), h
.ifs
.size() );
351 string
& oldpre
= h
.ifspreproc
.back();
352 ASSERT ( oldpre
!= "else" );
353 if ( oldpre
== "ifdef" )
354 h
.ifs
.back() = "ifndef";
355 else if ( oldpre
== "ifndef" )
356 h
.ifs
.back() = "ifdef";
357 else if ( oldpre
== "if" )
358 h
.ifs
.back() = string("!(") + h
.ifs
.back() + ")";
361 printf ( "unrecognized preproc '%s'\n", oldpre
.c_str() );
366 if ( dbg_filename
== h
.filename
)
367 printf ( "POST-PUSHPOP preproc stack = %lu\n", h
.ifs
.size() );
369 else if ( preproc
== "include_next" )
371 // we can safely ignore this command...
373 else if ( preproc
== "pragma" )
375 h
.pragmas
.push_back ( element
);
377 else if ( preproc
== "error" )
379 // FIXME - how to handle these
383 printf ( "process_preprocessor() choked on '%s'\n", preproc
.c_str() );
387 void process_c ( Header
& h
, const string
& element
)
389 //printf ( "\"%s\"\n\n", binary2cstr(element).c_str() );
393 Symbol
*s
= new Symbol
;
394 s
->definition
= element
;
395 s
->type
= process ( element
, s
->names
, isTypedef
, s
->dependencies
);
397 for ( int i
= 0; i
< h
.ifs
.size(); i
++ )
399 if ( h
.ifs
[i
].size() )
400 s
->ifs
.push_back ( h
.ifs
[i
] );
403 /*printf ( "names: " );
404 if ( s->names.size() )
406 printf ( "%s", s->names[0].c_str() );
407 for ( int i = 1; i < s->names.size(); i++ )
408 printf ( ", %s", s->names[i].c_str() );
414 printf ( "dependencies: " );
415 if ( s->dependencies.size() )
417 printf ( "%s", s->dependencies[0].c_str() );
418 for ( int i = 1; i < s->dependencies.size(); i++ )
419 printf ( ", %s", s->dependencies[i].c_str() );
425 h
.symbols
.push_back ( s
);
428 char* skipsemi ( char* p
)
430 if ( *p
!= '{' ) // }
437 char* s
= strchr ( p
, '{' );
438 char* e
= strchr ( p
, '}' );
443 // make sure we don't return pointer past null
453 char* findend ( char* p
, bool& externc
)
455 //if ( !strncmp ( p, "typedef struct _OSVERSIONINFOEXA : ", 35 ) )
457 // special-case for 'extern "C"'
458 if ( !strncmp ( p
, "extern", 6 ) )
462 if ( !strncmp ( p2
, "\"C\"", 3 ) )
473 // special-case for 'typedef_tident'
474 if ( !strncmp ( p
, "typedef_tident", 14 ) )
476 char* end
= strchr ( p
, ')' );
481 bool isStruct
= false;
483 char* end
= strchr ( p
, ';' );
488 char* semi
= strchr ( p
, '{' );
489 if ( !semi
|| semi
> end
)
491 end
= skipsemi ( semi
);
493 const char* structs
[] = { "struct", "enum", "class", "union" };
494 for ( int i
= 0; i
< sizeof(structs
)/sizeof(structs
[0]); i
++ )
496 char* pStruct
= strstr ( p
, structs
[i
] );
499 && !__iscsym(pStruct
[-1])
500 && !__iscsym(pStruct
[strlen(structs
[i
])]) )
502 // make sure there's at most one identifier followed
504 pStruct
+= strlen(structs
[i
]);
505 pStruct
= skip_ws ( pStruct
);
506 if ( __iscsymf(*pStruct
) )
508 while ( __iscsym(*pStruct
) )
510 pStruct
= skip_ws ( pStruct
);
512 // special exception - C++ classes & stuff
513 if ( *pStruct
== ':' )
515 pStruct
= skip_ws ( pStruct
+ 1 );
516 ASSERT ( !strncmp(pStruct
,"public",6) || !strncmp(pStruct
,"protected",9) || !strncmp(pStruct
,"private",7) );
518 while ( __iscsym(*pStruct
) )
520 pStruct
= skip_ws ( pStruct
);
521 // skip base-class-name:
522 ASSERT ( __iscsymf(*pStruct
) );
523 while ( __iscsym(*pStruct
) )
525 pStruct
= skip_ws ( pStruct
);
527 if ( *pStruct
== '{' )
535 end
= strchr ( end
, ';' );
543 char* p2
= skip_ws ( end
);
550 int skip_declspec ( const vector
<string
>& tokens
, int off
)
552 if ( tokens
[off
] == "__declspec" )
555 TOKASSERT ( tokens
[off
] == "(" );
560 if ( tokens
[off
] == "(" )
562 else if ( tokens
[off
] == ")" )
570 Type
identify ( const vector
<string
>& tokens
, int off
)
572 off
= skip_declspec ( tokens
, off
);
573 /*if ( tokens.size() > off+4 )
575 if ( tokens[off+4] == "PCONTROLDISPATCHER" )
578 /*if ( tokens.size() > off+1 )
580 if ( tokens[off+1] == "_OSVERSIONINFOEXA" )
583 if ( tokens
[off
] == "__asm__" )
584 return T_IGNORED_STATEMENT
;
585 else if ( tokens
[off
] == "return" )
586 return T_IGNORED_STATEMENT
;
587 else if ( tokens
[off
] == "typedef_tident" )
589 else if ( tokens
[off
] == "if" )
591 else if ( tokens
[off
] == "while" )
593 else if ( tokens
[off
] == "do" )
598 for ( int i
= off
; i
< tokens
.size(); i
++ )
600 if ( tokens
[i
] == "(" && !brackets
)
602 else if ( tokens
[i
] == ")" && !brackets
&& openparens
== 1 )
604 else if ( tokens
[i
] == "{" )
606 else if ( (tokens
[i
] == "struct" || tokens
[i
] == "union") && !openparens
)
608 for ( int j
= i
+ 1; j
< tokens
.size(); j
++ )
610 if ( tokens
[j
] == "{" )
612 else if ( tokens
[j
] == "(" || tokens
[j
] == ";" || tokens
[j
] == "*" )
616 else if ( tokens
[i
] == ";" )
618 else if ( tokens
[i
] == "__attribute__" )
621 if ( openparens
> 1 && closeparens
)
622 return T_FUNCTION_PTR
;
623 else if ( openparens
>= 1 )
628 Type
process ( const string
& element
, vector
<string
>& names
, bool& isTypedef
, vector
<string
>& dependencies
)
632 dependencies
.resize ( 0 );
634 vector
<string
> tokens
;
636 tokenize ( element
, tokens
);
638 // now let's do the classification...
640 if ( tokens
[i
] == "typedef" )
646 Type t
= identify ( tokens
, i
);
648 parse_type ( t
, tokens
, i
, names
, dependencies
);
653 int parse_type ( Type t
, const vector
<string
>& tokens
, int off
, vector
<string
>& names
, vector
<string
>& dependencies
)
657 case T_IGNORED_STATEMENT
:
658 return parse_ignored_statement ( tokens
, off
, names
, dependencies
);
660 return parse_tident ( tokens
, off
, names
, dependencies
);
662 return parse_variable ( tokens
, off
, names
, dependencies
);
664 return parse_struct ( tokens
, off
, names
, dependencies
);
666 return parse_function ( tokens
, off
, names
, dependencies
);
668 return parse_function_ptr ( tokens
, off
, names
, dependencies
);
671 return parse_ifwhile ( tokens
, off
, names
, dependencies
);
673 return parse_do ( tokens
, off
, names
, dependencies
);
675 TOKASSERT(!"unidentified type in parse_type()");
680 void name ( const string
& ident
, vector
<string
>& names
)
682 if ( !__iscsymf ( ident
[0] ) )
684 if ( iskeyword ( ident
) )
686 for ( int i
= 0; i
< names
.size(); i
++ )
688 if ( names
[i
] == ident
)
691 names
.push_back ( ident
);
694 void depend ( const string
& ident
, vector
<string
>& dependencies
)
696 if ( !__iscsymf ( ident
[0] ) )
698 if ( iskeyword ( ident
) )
700 for ( int i
= 0; i
< dependencies
.size(); i
++ )
702 if ( dependencies
[i
] == ident
)
705 dependencies
.push_back ( ident
);
708 int parse_ignored_statement ( const vector
<string
>& tokens
, int off
, vector
<string
>& names
, vector
<string
>& dependencies
)
711 while ( tokens
[off
] != ";" )
713 ASSERT ( tokens
[off
] == ";" );
717 int parse_tident ( const vector
<string
>& tokens
, int off
, vector
<string
>& names
, vector
<string
>& dependencies
)
719 TOKASSERT ( tokens
[off
] == "typedef_tident" );
720 TOKASSERT ( tokens
[off
+1] == "(" && tokens
[off
+3] == ")" );
721 names
.push_back ( tokens
[off
+2] );
722 dependencies
.push_back ( "typedef_tident" );
726 int parse_variable ( const vector
<string
>& tokens
, int off
, vector
<string
>& names
, vector
<string
>& dependencies
)
728 // NOTE - Test with bitfields, I think this code will actually handle them properly...
729 if ( tokens
[off
] == ";" )
731 depend ( tokens
[off
++], dependencies
);
732 int done
= tokens
.size();
733 while ( off
< tokens
.size() && tokens
[off
] != ";" )
734 name ( tokens
[off
++], names
);
735 TOKASSERT ( off
< tokens
.size() && tokens
[off
] == ";" );
739 int parse_struct ( const vector
<string
>& tokens
, int off
, vector
<string
>& names
, vector
<string
>& dependencies
)
741 int done
= tokens
.size();
743 //if ( tokens[off+1] == "_LARGE_INTEGER" )
746 while ( off
< done
&& tokens
[off
] != "struct" && tokens
[off
] != "union" )
747 depend ( tokens
[off
++], dependencies
);
749 TOKASSERT ( tokens
[off
] == "struct" || tokens
[off
] == "union" );
750 if ( tokens
[off
] != "struct" && tokens
[off
] != "union" )
754 if ( tokens
[off
] != "{" )
755 name ( tokens
[off
++], names
);
757 if ( tokens
[off
] == ":" )
760 TOKASSERT ( tokens
[off
] == "public" || tokens
[off
] == "protected" || tokens
[off
] == "private" );
762 depend ( tokens
[off
++], dependencies
);
765 TOKASSERT ( tokens
[off
] == "{" );
768 // skip through body of struct - noting any dependencies
770 //if ( off >= done ) _CrtDbgBreak();
771 while ( off
< done
&& tokens
[off
] != "}" )
773 vector
<string
> fauxnames
;
774 Type t
= identify ( tokens
, off
);
775 off
= parse_type ( t
, tokens
, off
, fauxnames
, dependencies
);
776 //if ( off >= done ) _CrtDbgBreak();
779 // process any trailing dependencies/names...
780 while ( tokens
[off
] != ";" )
782 TOKASSERT ( off
+1 < done
);
783 if ( tokens
[off
+1] == "," || tokens
[off
+1] == ";" )
784 name ( tokens
[off
], names
);
786 depend ( tokens
[off
], dependencies
);
790 TOKASSERT ( tokens
[off
] == ";" );
796 int parse_param ( const vector
<string
>& tokens
, int off
, vector
<string
>& names
, vector
<string
>& dependencies
)
798 if ( tokens
[off
] == ")" )
800 // special-case check for function pointer params
806 if ( tokens
[done
] == "," && parens
== 1 )
808 if ( tokens
[done
] == ")" )
815 if ( tokens
[done
] == "(" )
817 if ( tokens
[done
] == "*" && tokens
[done
-1] == "(" )
824 depend ( tokens
[off
++], dependencies
);
826 name ( tokens
[off
++], names
);
830 int parse_function ( const vector
<string
>& tokens
, int off
, vector
<string
>& names
, vector
<string
>& dependencies
)
832 vector
<string
> fauxnames
;
834 off
= skip_declspec ( tokens
, off
);
836 while ( tokens
[off
+1] != "(" )
837 depend ( tokens
[off
++], dependencies
);
838 name ( tokens
[off
++], names
);
840 TOKASSERT ( tokens
[off
] == "(" );
842 while ( tokens
[off
] != ")" )
845 off
= parse_param ( tokens
, off
, fauxnames
, dependencies
);
846 TOKASSERT ( tokens
[off
] == "," || tokens
[off
] == ")" );
851 // check for "attributes"
852 if ( tokens
[off
] == "__attribute__" )
855 TOKASSERT ( tokens
[off
] == "(" );
860 if ( tokens
[off
] == "(" )
862 else if ( tokens
[off
] == ")" )
868 // is this just a function *declaration* ?
869 if ( tokens
[off
] == ";" )
872 // we have a function body...
873 TOKASSERT ( tokens
[off
] == "{" );
876 while ( tokens
[off
] != "}" )
878 Type t
= identify ( tokens
, off
);
879 if ( t
== T_VARIABLE
)
880 off
= parse_type ( t
, tokens
, off
, fauxnames
, dependencies
);
883 while ( tokens
[off
] != ";" )
885 TOKASSERT ( tokens
[off
] == ";" );
890 TOKASSERT ( tokens
[off
] == "}" );
896 int parse_function_ptr ( const vector
<string
>& tokens
, int off
, vector
<string
>& names
, vector
<string
>& dependencies
)
898 off
= skip_declspec ( tokens
, off
);
900 while ( tokens
[off
] != "(" )
901 depend ( tokens
[off
++], dependencies
);
903 TOKASSERT ( tokens
[off
] == "(" );
906 while ( tokens
[off
+1] != ")" )
907 depend ( tokens
[off
++], dependencies
);
908 name ( tokens
[off
++], names
);
910 TOKASSERT ( tokens
[off
] == ")" );
914 TOKASSERT ( tokens
[off
] == "(" );
916 while ( tokens
[off
] != ")" )
919 vector
<string
> fauxnames
;
920 off
= parse_param ( tokens
, off
, fauxnames
, dependencies
);
921 TOKASSERT ( tokens
[off
] == "," || tokens
[off
] == ")" );
925 TOKASSERT ( tokens
[off
] == ";" );
930 int parse_ifwhile ( const vector
<string
>& tokens
, int off
, vector
<string
>& names
, vector
<string
>& dependencies
)
932 TOKASSERT ( tokens
[off
] == "if" || tokens
[off
] == "while" );
935 TOKASSERT ( tokens
[off
] == "(" );
938 TOKASSERT ( tokens
[off
] != ")" );
939 while ( tokens
[off
] != ")" )
942 if ( tokens
[off
] == "{" )
944 while ( tokens
[off
] != "}" )
946 Type t
= identify ( tokens
, off
);
947 off
= parse_type ( t
, tokens
, off
, names
, dependencies
);
954 int parse_do ( const vector
<string
>& tokens
, int off
, vector
<string
>& names
, vector
<string
>& dependencies
)
956 TOKASSERT ( tokens
[off
] == "do" );
959 if ( tokens
[off
] != "{" )
961 Type t
= identify ( tokens
, off
);
962 off
= parse_type ( t
, tokens
, off
, names
, dependencies
);
966 while ( tokens
[off
] != "}" )
968 Type t
= identify ( tokens
, off
);
969 off
= parse_type ( t
, tokens
, off
, names
, dependencies
);
973 TOKASSERT ( tokens
[off
] == "while" );
976 TOKASSERT ( tokens
[off
] == "(" );
977 while ( tokens
[off
] != ")" )
980 TOKASSERT ( tokens
[off
] == ")" );
983 TOKASSERT ( tokens
[off
] == ";" );