4 # Convert source code comments to multi-line blocks (library file).
6 # Copyright 2002-2016 by
9 # This file is part of the FreeType project, and may only be used,
10 # modified, and distributed under the terms of the FreeType project
11 # license, LICENSE.TXT. By continuing to use, modify, or distribute
12 # this file you indicate that you have read the license and
13 # understand and accept it fully.
16 # This library file contains definitions of classes needed to decompose C
17 # source code files into a series of multi-line `blocks'. There are two
20 # - Normal blocks, which contain source code or ordinary comments.
22 # - Documentation blocks, which have restricted formatting, and whose text
23 # always start with a documentation markup tag like `<Function>',
26 # The routines to process the content of documentation blocks are contained
27 # in file `content.py'; the classes and methods found here only deal with
28 # text parsing and basic documentation block extraction.
32 import fileinput
, re
, sys
, os
, string
35 ################################################################
37 ## SOURCE BLOCK FORMAT CLASS
39 ## A simple class containing compiled regular expressions to detect
40 ## potential documentation format block comments within C source code.
42 ## The `column' pattern must contain a group to `unbox' the content of
43 ## documentation comment blocks.
45 ## Later on, paragraphs are converted to long lines, which simplifies the
46 ## regular expressions that act upon the text.
48 class SourceBlockFormat
:
50 def __init__( self
, id, start
, column
, end
):
51 """Create a block pattern, used to recognize special documentation
54 self
.start
= re
.compile( start
, re
.VERBOSE
)
55 self
.column
= re
.compile( column
, re
.VERBOSE
)
56 self
.end
= re
.compile( end
, re
.VERBOSE
)
60 # Format 1 documentation comment blocks.
62 # /************************************/ (at least 2 asterisks)
66 # /************************************/ (at least 2 asterisks)
69 \s* # any number of whitespace
70 /\*{2,}/ # followed by '/' and at least two asterisks then '/'
71 \s*$ # probably followed by whitespace
75 \s* # any number of whitespace
76 /\*{1} # followed by '/' and precisely one asterisk
77 ([^*].*) # followed by anything (group 1)
78 \*{1}/ # followed by one asterisk and a '/'
79 \s*$ # probably followed by whitespace
82 re_source_block_format1
= SourceBlockFormat( 1, start
, column
, start
)
86 # Format 2 documentation comment blocks.
88 # /************************************ (at least 2 asterisks)
92 # */ (1 or more asterisks)
95 \s* # any number of whitespace
96 /\*{2,} # followed by '/' and at least two asterisks
97 \s*$ # probably followed by whitespace
101 \s* # any number of whitespace
102 \*{1}(?![*/]) # followed by precisely one asterisk not followed by `/'
103 (.*) # then anything (group1)
107 \s* # any number of whitespace
108 \*+/ # followed by at least one asterisk, then '/'
111 re_source_block_format2
= SourceBlockFormat( 2, start
, column
, end
)
115 # The list of supported documentation block formats. We could add new ones
118 re_source_block_formats
= [re_source_block_format1
, re_source_block_format2
]
122 # The following regular expressions correspond to markup tags within the
123 # documentation comment blocks. They are equivalent despite their different
126 # A markup tag consists of letters or character `-', to be found in group 1.
128 # Notice that a markup tag _must_ begin a new paragraph.
130 re_markup_tag1
= re
.compile( r
'''\s*<((?:\w|-)*)>''' ) # <xxxx> format
131 re_markup_tag2
= re
.compile( r
'''\s*@((?:\w|-)*):''' ) # @xxxx: format
134 # The list of supported markup tags. We could add new ones quite easily.
136 re_markup_tags
= [re_markup_tag1
, re_markup_tag2
]
140 # A regular expression to detect a cross reference, after markup tags have
143 # Two syntax forms are supported:
148 # where both `<name>' and `<id>' consist of alphanumeric characters, `_',
149 # and `-'. Use `<id>' if there are multiple, valid `<name>' entries.
153 re_crossref
= re
.compile( r
"""
161 # Two regular expressions to detect italic and bold markup, respectively.
162 # Group 1 is the markup, group 2 the rest of the line.
164 # Note that the markup is limited to words consisting of letters, digits,
165 # the characters `_' and `-', or an apostrophe (but not as the first
168 re_italic
= re
.compile( r
"_((?:\w|-)(?:\w|'|-)*)_(.*)" ) # _italic_
169 re_bold
= re
.compile( r
"\*((?:\w|-)(?:\w|'|-)*)\*(.*)" ) # *bold*
172 # This regular expression code to identify an URL has been taken from
174 # http://mail.python.org/pipermail/tutor/2002-September/017228.html
176 # (with slight modifications).
178 urls
= r
'(?:https?|telnet|gopher|file|wais|ftp)'
180 gunk
= r
'/#~:.?+=&%@!\-'
182 any
= "%(ltrs)s%(gunk)s%(punc)s" % { 'ltrs' : ltrs
,
187 \b # start at word boundary
188 %(urls)s : # need resource and a colon
189 [%(any)s] +? # followed by one or more of any valid
190 # character, but be conservative and
191 # take only what you need to...
192 (?= # [look-ahead non-consumptive assertion]
193 [%(punc)s]* # either 0 or more punctuation
194 (?: # [non-grouping parentheses]
195 [^%(any)s] | $ # followed by a non-url char
196 # or end of the string
200 """ % {'urls' : urls
,
204 re_url
= re
.compile( url
, re
.VERBOSE | re
.MULTILINE
)
207 # A regular expression that stops collection of comments for the current
210 re_source_sep
= re
.compile( r
'\s*/\*\s*\*/' ) # /* */
213 # A regular expression to find possible C identifiers while outputting
214 # source code verbatim, covering things like `*foo' or `(bar'. Group 1 is
215 # the prefix, group 2 the identifier -- since we scan lines from left to
216 # right, sequentially splitting the source code into prefix and identifier
217 # is fully sufficient for our purposes.
219 re_source_crossref
= re
.compile( r
'(\W*)(\w*)' )
222 # A regular expression that matches a list of reserved C source keywords.
224 re_source_keywords
= re
.compile( '''\\b ( typedef |
243 \#endif ) \\b''', re
.VERBOSE
)
246 ################################################################
248 ## SOURCE BLOCK CLASS
250 ## There are two important fields in a `SourceBlock' object.
253 ## A list of text lines for the corresponding block.
256 ## For documentation comment blocks only, this is the block content
257 ## that has been `unboxed' from its decoration. This is `None' for all
258 ## other blocks (i.e., sources or ordinary comments with no starting
263 def __init__( self
, processor
, filename
, lineno
, lines
):
264 self
.processor
= processor
265 self
.filename
= filename
267 self
.lines
= lines
[:]
268 self
.format
= processor
.format
271 if self
.format
== None:
276 # extract comment lines
279 for line0
in self
.lines
:
280 m
= self
.format
.column
.match( line0
)
282 lines
.append( m
.group( 1 ) )
284 # now, look for a markup tag
286 l
= string
.strip( l
)
288 for tag
in re_markup_tags
:
293 def location( self
):
294 return "(" + self
.filename
+ ":" + repr( self
.lineno
) + ")"
296 # debugging only -- not used in normal operations
299 print "{{{content start---"
300 for l
in self
.content
:
302 print "---content end}}}"
307 fmt
= repr( self
.format
.id ) + " "
309 for line
in self
.lines
:
313 ################################################################
315 ## SOURCE PROCESSOR CLASS
317 ## The `SourceProcessor' is in charge of reading a C source file and
318 ## decomposing it into a series of different `SourceBlock' objects.
320 ## A SourceBlock object consists of the following data.
322 ## - A documentation comment block using one of the layouts above. Its
323 ## exact format will be discussed later.
325 ## - Normal sources lines, including comments.
328 class SourceProcessor
:
330 def __init__( self
):
331 """Initialize a source processor."""
338 """Reset a block processor and clean up all its blocks."""
342 def parse_file( self
, filename
):
343 """Parse a C source file and add its blocks to the processor's
347 self
.filename
= filename
354 for line
in fileinput
.input( filename
):
355 # strip trailing newlines, important on Windows machines!
356 if line
[-1] == '\012':
359 if self
.format
== None:
360 self
.process_normal_line( line
)
362 if self
.format
.end
.match( line
):
363 # A normal block end. Add it to `lines' and create a
365 self
.lines
.append( line
)
366 self
.add_block_lines()
367 elif self
.format
.column
.match( line
):
368 # A normal column line. Add it to `lines'.
369 self
.lines
.append( line
)
371 # An unexpected block end. Create a new block, but
372 # don't process the line.
373 self
.add_block_lines()
375 # we need to process the line again
376 self
.process_normal_line( line
)
378 # record the last lines
379 self
.add_block_lines()
381 def process_normal_line( self
, line
):
382 """Process a normal line and check whether it is the start of a new
384 for f
in re_source_block_formats
:
385 if f
.start
.match( line
):
386 self
.add_block_lines()
388 self
.lineno
= fileinput
.filelineno()
390 self
.lines
.append( line
)
392 def add_block_lines( self
):
393 """Add the current accumulated lines and create a new block."""
395 block
= SourceBlock( self
,
400 self
.blocks
.append( block
)
404 # debugging only, not used in normal operations
406 """Print all blocks in a processor."""
407 for b
in self
.blocks
: