1 # Sources (c) 2002, 2003, 2004, 2006, 2007, 2008, 2009
2 # David Turner <david@freetype.org>
5 # this file contains definitions of classes needed to decompose
6 # C sources files into a series of multi-line "blocks". There are
9 # - normal blocks, which contain source code or ordinary comments
11 # - documentation blocks, which have restricted formatting, and
12 # whose text always start with a documentation markup tag like
13 # "<Function>", "<Type>", etc..
15 # the routines used to process the content of documentation blocks
16 # are not contained here, but in "content.py"
18 # the classes and methods found here only deal with text parsing
19 # and basic documentation block extraction
22 import fileinput
, re
, sys
, os
, string
26 ################################################################
28 ## BLOCK FORMAT PATTERN
30 ## A simple class containing compiled regular expressions used
31 ## to detect potential documentation format block comments within
34 ## note that the 'column' pattern must contain a group that will
35 ## be used to "unbox" the content of documentation comment blocks
37 class SourceBlockFormat
:
39 def __init__( self
, id, start
, column
, end
):
40 """create a block pattern, used to recognize special documentation blocks"""
42 self
.start
= re
.compile( start
, re
.VERBOSE
)
43 self
.column
= re
.compile( column
, re
.VERBOSE
)
44 self
.end
= re
.compile( end
, re
.VERBOSE
)
49 # format 1 documentation comment blocks look like the following:
51 # /************************************/
55 # /************************************/
57 # we define a few regular expressions here to detect them
61 \s* # any number of whitespace
62 /\*{2,}/ # followed by '/' and at least two asterisks then '/'
63 \s*$ # probably followed by whitespace
67 \s* # any number of whitespace
68 /\*{1} # followed by '/' and precisely one asterisk
69 ([^*].*) # followed by anything (group 1)
70 \*{1}/ # followed by one asterisk and a '/'
71 \s*$ # probably followed by whitespace
74 re_source_block_format1
= SourceBlockFormat( 1, start
, column
, start
)
78 # format 2 documentation comment blocks look like the following:
80 # /************************************ (at least 2 asterisks)
85 # **/ (1 or more asterisks at the end)
87 # we define a few regular expressions here to detect them
90 \s* # any number of whitespace
91 /\*{2,} # followed by '/' and at least two asterisks
92 \s*$ # probably followed by whitespace
96 \s* # any number of whitespace
97 \*{1}(?!/) # followed by precisely one asterisk not followed by `/'
98 (.*) # then anything (group1)
102 \s* # any number of whitespace
103 \*+/ # followed by at least one asterisk, then '/'
106 re_source_block_format2
= SourceBlockFormat( 2, start
, column
, end
)
110 # the list of supported documentation block formats, we could add new ones
113 re_source_block_formats
= [re_source_block_format1
, re_source_block_format2
]
117 # the following regular expressions corresponds to markup tags
118 # within the documentation comment blocks. they're equivalent
119 # despite their different syntax
121 # notice how each markup tag _must_ begin a new line
123 re_markup_tag1
= re
.compile( r
'''\s*<(\w*)>''' ) # <xxxx> format
124 re_markup_tag2
= re
.compile( r
'''\s*@(\w*):''' ) # @xxxx: format
127 # the list of supported markup tags, we could add new ones relatively
130 re_markup_tags
= [re_markup_tag1
, re_markup_tag2
]
133 # used to detect a cross-reference, after markup tags have been stripped
135 re_crossref
= re
.compile( r
'@(\w*)(.*)' )
138 # used to detect italic and bold styles in paragraph text
140 re_italic
= re
.compile( r
"_(\w(\w|')*)_(.*)" ) # _italic_
141 re_bold
= re
.compile( r
"\*(\w(\w|')*)\*(.*)" ) # *bold*
144 # used to detect the end of commented source lines
146 re_source_sep
= re
.compile( r
'\s*/\*\s*\*/' )
149 # used to perform cross-reference within source output
151 re_source_crossref
= re
.compile( r
'(\W*)(\w*)' )
154 # a list of reserved source keywords
156 re_source_keywords
= re
.compile( '''\\b ( typedef |
175 \#endif ) \\b''', re
.VERBOSE
)
178 ################################################################
180 ## SOURCE BLOCK CLASS
182 ## A SourceProcessor is in charge of reading a C source file
183 ## and decomposing it into a series of different "SourceBlocks".
184 ## each one of these blocks can be made of the following data:
186 ## - A documentation comment block that starts with "/**" and
187 ## whose exact format will be discussed later
189 ## - normal sources lines, including comments
191 ## the important fields in a text block are the following ones:
193 ## self.lines : a list of text lines for the corresponding block
195 ## self.content : for documentation comment blocks only, this is the
196 ## block content that has been "unboxed" from its
197 ## decoration. This is None for all other blocks
198 ## (i.e. sources or ordinary comments with no starting
203 def __init__( self
, processor
, filename
, lineno
, lines
):
204 self
.processor
= processor
205 self
.filename
= filename
207 self
.lines
= lines
[:]
208 self
.format
= processor
.format
211 if self
.format
== None:
216 # extract comment lines
219 for line0
in self
.lines
:
220 m
= self
.format
.column
.match( line0
)
222 lines
.append( m
.group( 1 ) )
224 # now, look for a markup tag
226 l
= string
.strip( l
)
228 for tag
in re_markup_tags
:
233 def location( self
):
234 return "(" + self
.filename
+ ":" + repr( self
.lineno
) + ")"
236 # debugging only - not used in normal operations
239 print "{{{content start---"
240 for l
in self
.content
:
242 print "---content end}}}"
247 fmt
= repr( self
.format
.id ) + " "
249 for line
in self
.lines
:
254 ################################################################
256 ## SOURCE PROCESSOR CLASS
258 ## The SourceProcessor is in charge of reading a C source file
259 ## and decomposing it into a series of different "SourceBlock"
262 ## each one of these blocks can be made of the following data:
264 ## - A documentation comment block that starts with "/**" and
265 ## whose exact format will be discussed later
267 ## - normal sources lines, include comments
270 class SourceProcessor
:
272 def __init__( self
):
273 """initialize a source processor"""
280 """reset a block processor, clean all its blocks"""
284 def parse_file( self
, filename
):
285 """parse a C source file, and add its blocks to the processor's list"""
288 self
.filename
= filename
295 for line
in fileinput
.input( filename
):
296 # strip trailing newlines, important on Windows machines!
297 if line
[-1] == '\012':
300 if self
.format
== None:
301 self
.process_normal_line( line
)
303 if self
.format
.end
.match( line
):
304 # that's a normal block end, add it to 'lines' and
306 self
.lines
.append( line
)
307 self
.add_block_lines()
308 elif self
.format
.column
.match( line
):
309 # that's a normal column line, add it to 'lines'
310 self
.lines
.append( line
)
312 # humm.. this is an unexpected block end,
313 # create a new block, but don't process the line
314 self
.add_block_lines()
316 # we need to process the line again
317 self
.process_normal_line( line
)
319 # record the last lines
320 self
.add_block_lines()
322 def process_normal_line( self
, line
):
323 """process a normal line and check whether it is the start of a new block"""
324 for f
in re_source_block_formats
:
325 if f
.start
.match( line
):
326 self
.add_block_lines()
328 self
.lineno
= fileinput
.filelineno()
330 self
.lines
.append( line
)
332 def add_block_lines( self
):
333 """add the current accumulated lines and create a new block"""
335 block
= SourceBlock( self
, self
.filename
, self
.lineno
, self
.lines
)
337 self
.blocks
.append( block
)
341 # debugging only, not used in normal operations
343 """print all blocks in a processor"""
344 for b
in self
.blocks
: