1 # Content (c) 2002, 2004, 2006-2009, 2012, 2013
2 # David Turner <david@freetype.org>
4 # This file contains routines used to parse the content of documentation
5 # comment blocks and build more structured objects out of them.
13 # this regular expression is used to detect code sequences. these
14 # are simply code fragments embedded in '{' and '}' like in:
24 # note that indentation of the starting and ending accolades must be
25 # exactly the same. the code sequence can contain accolades at greater
28 re_code_start
= re
.compile( r
"(\s*){\s*$" )
29 re_code_end
= re
.compile( r
"(\s*)}\s*$" )
32 # this regular expression is used to isolate identifiers from
35 re_identifier
= re
.compile( r
'((?:\w|-)*)' )
38 # we collect macros ending in `_H'; while outputting the object data, we use
39 # this info together with the object's file location to emit the appropriate
40 # header file macro and name before the object itself
42 re_header_macro
= re
.compile( r
'^#define\s{1,}(\w{1,}_H)\s{1,}<(.*)>' )
45 #############################################################################
47 # The DocCode class is used to store source code lines.
49 # 'self.lines' contains a set of source code lines that will be dumped as
50 # HTML in a <PRE> tag.
52 # The object is filled line by line by the parser; it strips the leading
53 # "margin" space from each input line before storing it in 'self.lines'.
57 def __init__( self
, margin
, lines
):
61 # remove margin spaces
63 if string
.strip( l
[:margin
] ) == "":
65 self
.lines
.append( l
)
67 def dump( self
, prefix
= "", width
= 60 ):
68 lines
= self
.dump_lines( 0, width
)
72 def dump_lines( self
, margin
= 0, width
= 60 ):
75 result
.append( " " * margin
+ l
)
80 #############################################################################
82 # The DocPara class is used to store "normal" text paragraph.
84 # 'self.words' contains the list of words that make up the paragraph
88 def __init__( self
, lines
):
93 self
.words
.extend( string
.split( l
) )
95 def dump( self
, prefix
= "", width
= 60 ):
96 lines
= self
.dump_lines( 0, width
)
100 def dump_lines( self
, margin
= 0, width
= 60 ):
101 cur
= "" # current line
102 col
= 0 # current width
105 for word
in self
.words
:
111 result
.append( " " * margin
+ cur
)
121 result
.append( " " * margin
+ cur
)
127 #############################################################################
129 # The DocField class is used to store a list containing either DocPara or
130 # DocCode objects. Each DocField also has an optional "name" which is used
131 # when the object corresponds to a field or value definition
135 def __init__( self
, name
, lines
):
136 self
.name
= name
# can be None for normal paragraphs/sources
137 self
.items
= [] # list of items
139 mode_none
= 0 # start parsing mode
140 mode_code
= 1 # parsing code sequences
141 mode_para
= 3 # parsing normal paragraph
143 margin
= -1 # current code sequence indentation
146 # now analyze the markup lines to see if they contain paragraphs,
147 # code sequences or fields definitions
153 # are we parsing a code sequence ?
154 if mode
== mode_code
:
155 m
= re_code_end
.match( l
)
156 if m
and len( m
.group( 1 ) ) <= margin
:
157 # that's it, we finished the code sequence
158 code
= DocCode( 0, cur_lines
)
159 self
.items
.append( code
)
164 # nope, continue the code sequence
165 cur_lines
.append( l
[margin
:] )
167 # start of code sequence ?
168 m
= re_code_start
.match( l
)
172 para
= DocPara( cur_lines
)
173 self
.items
.append( para
)
176 # switch to code extraction mode
177 margin
= len( m
.group( 1 ) )
180 if not string
.split( l
) and cur_lines
:
181 # if the line is empty, we end the current paragraph,
183 para
= DocPara( cur_lines
)
184 self
.items
.append( para
)
187 # otherwise, simply add the line to the current
189 cur_lines
.append( l
)
191 if mode
== mode_code
:
192 # unexpected end of code sequence
193 code
= DocCode( margin
, cur_lines
)
194 self
.items
.append( code
)
196 para
= DocPara( cur_lines
)
197 self
.items
.append( para
)
199 def dump( self
, prefix
= "" ):
201 print prefix
+ self
.field
+ " ::"
202 prefix
= prefix
+ "----"
211 def dump_lines( self
, margin
= 0, width
= 60 ):
219 result
.extend( p
.dump_lines( margin
, width
) )
226 # this regular expression is used to detect field definitions
228 re_field
= re
.compile( r
"\s*(\w*|\w(\w|\.)*\w)\s*::" )
234 def __init__( self
, tag
, lines
):
235 self
.tag
= string
.lower( tag
)
243 m
= re_field
.match( l
)
245 # we detected the start of a new field definition
247 # first, save the current one
249 f
= DocField( field
, cur_lines
)
250 self
.fields
.append( f
)
254 field
= m
.group( 1 ) # record field name
255 ln
= len( m
.group( 0 ) )
256 l
= " " * ln
+ l
[ln
:]
259 cur_lines
.append( l
)
261 if field
or cur_lines
:
262 f
= DocField( field
, cur_lines
)
263 self
.fields
.append( f
)
265 def get_name( self
):
267 return self
.fields
[0].items
[0].words
[0]
271 def dump( self
, margin
):
272 print " " * margin
+ "<" + self
.tag
+ ">"
273 for f
in self
.fields
:
275 print " " * margin
+ "</" + self
.tag
+ ">"
281 def __init__( self
, block
):
285 self
.name
= block
.name
286 self
.title
= block
.get_markup_words( "title" )
287 self
.order
= block
.get_markup_words( "sections" )
290 self
.title
= string
.split( "Miscellaneous" )
297 def __init__( self
, name
= "Other" ):
300 self
.block_names
= [] # ordered block names in section
303 self
.description
= ""
308 def add_def( self
, block
):
309 self
.defs
.append( block
)
311 def add_block( self
, block
):
312 self
.block_names
.append( block
.name
)
313 self
.blocks
[block
.name
] = block
316 # look up one block that contains a valid section description
317 for block
in self
.defs
:
318 title
= block
.get_markup_text( "title" )
321 self
.abstract
= block
.get_markup_words( "abstract" )
322 self
.description
= block
.get_markup_items( "description" )
323 self
.order
= block
.get_markup_words( "order" )
327 self
.block_names
= sort_order_list( self
.block_names
, self
.order
)
331 class ContentProcessor
:
333 def __init__( self
):
334 """initialize a block content processor"""
337 self
.sections
= {} # dictionary of documentation sections
338 self
.section
= None # current documentation section
340 self
.chapters
= [] # list of chapters
342 self
.headers
= {} # dictionary of header macros
344 def set_section( self
, section_name
):
345 """set current section during parsing"""
346 if not self
.sections
.has_key( section_name
):
347 section
= DocSection( section_name
)
348 self
.sections
[section_name
] = section
349 self
.section
= section
351 self
.section
= self
.sections
[section_name
]
353 def add_chapter( self
, block
):
354 chapter
= DocChapter( block
)
355 self
.chapters
.append( chapter
)
359 """reset the content processor for a new block"""
362 self
.markup_lines
= []
364 def add_markup( self
):
365 """add a new markup section"""
366 if self
.markup
and self
.markup_lines
:
368 # get rid of last line of markup if it's empty
369 marks
= self
.markup_lines
370 if len( marks
) > 0 and not string
.strip( marks
[-1] ):
371 self
.markup_lines
= marks
[:-1]
373 m
= DocMarkup( self
.markup
, self
.markup_lines
)
375 self
.markups
.append( m
)
378 self
.markup_lines
= []
380 def process_content( self
, content
):
381 """process a block content and return a list of DocMarkup objects
382 corresponding to it"""
389 for t
in re_markup_tags
:
392 found
= string
.lower( m
.group( 1 ) )
393 prefix
= len( m
.group( 0 ) )
394 line
= " " * prefix
+ line
[prefix
:] # remove markup from line
397 # is it the start of a new markup section ?
400 self
.add_markup() # add current markup content
402 if len( string
.strip( line
) ) > 0:
403 self
.markup_lines
.append( line
)
405 self
.markup_lines
.append( line
)
411 def parse_sources( self
, source_processor
):
412 blocks
= source_processor
.blocks
413 count
= len( blocks
)
415 for n
in range( count
):
418 # this is a documentation comment, we need to catch
419 # all following normal blocks in the "follow" list
423 while m
< count
and not blocks
[m
].content
:
424 follow
.append( blocks
[m
] )
427 doc_block
= DocBlock( source
, follow
, self
)
430 # process all sections to extract their abstract, description
431 # and ordered list of items
433 for sec
in self
.sections
.values():
436 # process chapters to check that all sections are correctly
438 for chap
in self
.chapters
:
439 for sec
in chap
.order
:
440 if self
.sections
.has_key( sec
):
441 section
= self
.sections
[sec
]
442 section
.chapter
= chap
444 chap
.sections
.append( section
)
446 sys
.stderr
.write( "WARNING: chapter '" + \
447 chap
.name
+ "' in " + chap
.block
.location() + \
448 " lists unknown section '" + sec
+ "'\n" )
450 # check that all sections are in a chapter
453 for sec
in self
.sections
.values():
457 # create a new special chapter for all remaining sections
461 chap
= DocChapter( None )
462 chap
.sections
= others
463 self
.chapters
.append( chap
)
469 def __init__( self
, source
, follow
, processor
):
474 self
.type = "ERRTYPE"
475 self
.name
= "ERRNAME"
476 self
.section
= processor
.section
477 self
.markups
= processor
.process_content( source
.content
)
479 # compute block type from first markup tag
481 self
.type = self
.markups
[0].tag
485 # compute block name from first markup paragraph
487 markup
= self
.markups
[0]
488 para
= markup
.fields
[0].items
[0]
490 m
= re_identifier
.match( name
)
497 if self
.type == "section":
498 # detect new section starts
499 processor
.set_section( self
.name
)
500 processor
.section
.add_def( self
)
501 elif self
.type == "chapter":
503 processor
.add_chapter( self
)
505 processor
.section
.add_block( self
)
507 # now, compute the source lines relevant to this documentation
508 # block. We keep normal comments in for obvious reasons (??)
514 # collect header macro definitions
515 m
= re_header_macro
.match( l
)
517 processor
.headers
[m
.group( 2 )] = m
.group( 1 );
519 # we use "/* */" as a separator
520 if re_source_sep
.match( l
):
524 # now strip the leading and trailing empty lines from the sources
526 end
= len( source
) - 1
528 while start
< end
and not string
.strip( source
[start
] ):
531 while start
< end
and not string
.strip( source
[end
] ):
534 if start
== end
and not string
.strip( source
[start
] ):
537 self
.code
= source
[start
:end
+ 1]
539 def location( self
):
540 return self
.source
.location()
542 def get_markup( self
, tag_name
):
543 """return the DocMarkup corresponding to a given tag in a block"""
544 for m
in self
.markups
:
545 if m
.tag
== string
.lower( tag_name
):
549 def get_markup_words( self
, tag_name
):
551 m
= self
.get_markup( tag_name
)
552 return m
.fields
[0].items
[0].words
556 def get_markup_text( self
, tag_name
):
557 result
= self
.get_markup_words( tag_name
)
558 return string
.join( result
)
560 def get_markup_items( self
, tag_name
):
562 m
= self
.get_markup( tag_name
)
563 return m
.fields
[0].items