zenilib  0.5.3.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
sources.py
Go to the documentation of this file.
1 # Sources (c) 2002, 2003, 2004, 2006, 2007, 2008, 2009
2 # David Turner <david@freetype.org>
3 #
4 #
5 # this file contains definitions of classes needed to decompose
6 # C sources files into a series of multi-line "blocks". There are
7 # two kinds of blocks:
8 #
9 # - normal blocks, which contain source code or ordinary comments
10 #
11 # - documentation blocks, which have restricted formatting, and
12 # whose text always start with a documentation markup tag like
13 # "<Function>", "<Type>", etc..
14 #
15 # the routines used to process the content of documentation blocks
16 # are not contained here, but in "content.py"
17 #
18 # the classes and methods found here only deal with text parsing
19 # and basic documentation block extraction
20 #
21 
22 import fileinput, re, sys, os, string
23 
24 
25 
26 ################################################################
27 ##
28 ## BLOCK FORMAT PATTERN
29 ##
30 ## A simple class containing compiled regular expressions used
31 ## to detect potential documentation format block comments within
32 ## C source code
33 ##
34 ## note that the 'column' pattern must contain a group that will
35 ## be used to "unbox" the content of documentation comment blocks
36 ##
38 
39  def __init__( self, id, start, column, end ):
40  """create a block pattern, used to recognize special documentation blocks"""
41  self.id = id
42  self.start = re.compile( start, re.VERBOSE )
43  self.column = re.compile( column, re.VERBOSE )
44  self.end = re.compile( end, re.VERBOSE )
45 
46 
47 
48 #
49 # format 1 documentation comment blocks look like the following:
50 #
51 # /************************************/
52 # /* */
53 # /* */
54 # /* */
55 # /************************************/
56 #
57 # we define a few regular expressions here to detect them
58 #
59 
60 start = r'''
61  \s* # any number of whitespace
62  /\*{2,}/ # followed by '/' and at least two asterisks then '/'
63  \s*$ # probably followed by whitespace
64 '''
65 
66 column = r'''
67  \s* # any number of whitespace
68  /\*{1} # followed by '/' and precisely one asterisk
69  ([^*].*) # followed by anything (group 1)
70  \*{1}/ # followed by one asterisk and a '/'
71  \s*$ # probably followed by whitespace
72 '''
73 
74 re_source_block_format1 = SourceBlockFormat( 1, start, column, start )
75 
76 
77 #
78 # format 2 documentation comment blocks look like the following:
79 #
80 # /************************************ (at least 2 asterisks)
81 # *
82 # *
83 # *
84 # *
85 # **/ (1 or more asterisks at the end)
86 #
87 # we define a few regular expressions here to detect them
88 #
89 start = r'''
90  \s* # any number of whitespace
91  /\*{2,} # followed by '/' and at least two asterisks
92  \s*$ # probably followed by whitespace
93 '''
94 
95 column = r'''
96  \s* # any number of whitespace
97  \*{1}(?!/) # followed by precisely one asterisk not followed by `/'
98  (.*) # then anything (group1)
99 '''
100 
101 end = r'''
102  \s* # any number of whitespace
103  \*+/ # followed by at least one asterisk, then '/'
104 '''
105 
106 re_source_block_format2 = SourceBlockFormat( 2, start, column, end )
107 
108 
109 #
110 # the list of supported documentation block formats, we could add new ones
111 # relatively easily
112 #
113 re_source_block_formats = [re_source_block_format1, re_source_block_format2]
114 
115 
116 #
117 # the following regular expressions corresponds to markup tags
118 # within the documentation comment blocks. they're equivalent
119 # despite their different syntax
120 #
121 # notice how each markup tag _must_ begin a new line
122 #
123 re_markup_tag1 = re.compile( r'''\s*<(\w*)>''' ) # <xxxx> format
124 re_markup_tag2 = re.compile( r'''\s*@(\w*):''' ) # @xxxx: format
125 
126 #
127 # the list of supported markup tags, we could add new ones relatively
128 # easily
129 #
130 re_markup_tags = [re_markup_tag1, re_markup_tag2]
131 
132 #
133 # used to detect a cross-reference, after markup tags have been stripped
134 #
135 re_crossref = re.compile( r'@(\w*)(.*)' )
136 
137 #
138 # used to detect italic and bold styles in paragraph text
139 #
140 re_italic = re.compile( r"_(\w(\w|')*)_(.*)" ) # _italic_
141 re_bold = re.compile( r"\*(\w(\w|')*)\*(.*)" ) # *bold*
142 
143 #
144 # used to detect the end of commented source lines
145 #
146 re_source_sep = re.compile( r'\s*/\*\s*\*/' )
147 
148 #
149 # used to perform cross-reference within source output
150 #
151 re_source_crossref = re.compile( r'(\W*)(\w*)' )
152 
153 #
154 # a list of reserved source keywords
155 #
156 re_source_keywords = re.compile( '''\\b ( typedef |
157  struct |
158  enum |
159  union |
160  const |
161  char |
162  int |
163  short |
164  long |
165  void |
166  signed |
167  unsigned |
168  \#include |
169  \#define |
170  \#undef |
171  \#if |
172  \#ifdef |
173  \#ifndef |
174  \#else |
175  \#endif ) \\b''', re.VERBOSE )
176 
177 
178 ################################################################
179 ##
180 ## SOURCE BLOCK CLASS
181 ##
182 ## A SourceProcessor is in charge of reading a C source file
183 ## and decomposing it into a series of different "SourceBlocks".
184 ## each one of these blocks can be made of the following data:
185 ##
186 ## - A documentation comment block that starts with "/**" and
187 ## whose exact format will be discussed later
188 ##
189 ## - normal sources lines, including comments
190 ##
191 ## the important fields in a text block are the following ones:
192 ##
193 ## self.lines : a list of text lines for the corresponding block
194 ##
195 ## self.content : for documentation comment blocks only, this is the
196 ## block content that has been "unboxed" from its
197 ## decoration. This is None for all other blocks
198 ## (i.e. sources or ordinary comments with no starting
199 ## markup tag)
200 ##
202 
203  def __init__( self, processor, filename, lineno, lines ):
204  self.processor = processor
205  self.filename = filename
206  self.lineno = lineno
207  self.lines = lines[:]
208  self.format = processor.format
209  self.content = []
210 
211  if self.format == None:
212  return
213 
214  words = []
215 
216  # extract comment lines
217  lines = []
218 
219  for line0 in self.lines:
220  m = self.format.column.match( line0 )
221  if m:
222  lines.append( m.group( 1 ) )
223 
224  # now, look for a markup tag
225  for l in lines:
226  l = string.strip( l )
227  if len( l ) > 0:
228  for tag in re_markup_tags:
229  if tag.match( l ):
230  self.content = lines
231  return
232 
233  def location( self ):
234  return "(" + self.filename + ":" + repr( self.lineno ) + ")"
235 
236  # debugging only - not used in normal operations
237  def dump( self ):
238  if self.content:
239  print "{{{content start---"
240  for l in self.content:
241  print l
242  print "---content end}}}"
243  return
244 
245  fmt = ""
246  if self.format:
247  fmt = repr( self.format.id ) + " "
248 
249  for line in self.lines:
250  print line
251 
252 
253 
254 ################################################################
255 ##
256 ## SOURCE PROCESSOR CLASS
257 ##
258 ## The SourceProcessor is in charge of reading a C source file
259 ## and decomposing it into a series of different "SourceBlock"
260 ## objects.
261 ##
262 ## each one of these blocks can be made of the following data:
263 ##
264 ## - A documentation comment block that starts with "/**" and
265 ## whose exact format will be discussed later
266 ##
267 ## - normal sources lines, include comments
268 ##
269 ##
271 
272  def __init__( self ):
273  """initialize a source processor"""
274  self.blocks = []
275  self.filename = None
276  self.format = None
277  self.lines = []
278 
279  def reset( self ):
280  """reset a block processor, clean all its blocks"""
281  self.blocks = []
282  self.format = None
283 
284  def parse_file( self, filename ):
285  """parse a C source file, and add its blocks to the processor's list"""
286  self.reset()
287 
288  self.filename = filename
289 
290  fileinput.close()
291  self.format = None
292  self.lineno = 0
293  self.lines = []
294 
295  for line in fileinput.input( filename ):
296  # strip trailing newlines, important on Windows machines!
297  if line[-1] == '\012':
298  line = line[0:-1]
299 
300  if self.format == None:
301  self.process_normal_line( line )
302  else:
303  if self.format.end.match( line ):
304  # that's a normal block end, add it to 'lines' and
305  # create a new block
306  self.lines.append( line )
307  self.add_block_lines()
308  elif self.format.column.match( line ):
309  # that's a normal column line, add it to 'lines'
310  self.lines.append( line )
311  else:
312  # humm.. this is an unexpected block end,
313  # create a new block, but don't process the line
314  self.add_block_lines()
315 
316  # we need to process the line again
317  self.process_normal_line( line )
318 
319  # record the last lines
320  self.add_block_lines()
321 
322  def process_normal_line( self, line ):
323  """process a normal line and check whether it is the start of a new block"""
324  for f in re_source_block_formats:
325  if f.start.match( line ):
326  self.add_block_lines()
327  self.format = f
328  self.lineno = fileinput.filelineno()
329 
330  self.lines.append( line )
331 
332  def add_block_lines( self ):
333  """add the current accumulated lines and create a new block"""
334  if self.lines != []:
335  block = SourceBlock( self, self.filename, self.lineno, self.lines )
336 
337  self.blocks.append( block )
338  self.format = None
339  self.lines = []
340 
341  # debugging only, not used in normal operations
342  def dump( self ):
343  """print all blocks in a processor"""
344  for b in self.blocks:
345  b.dump()
346 
347 # eof
BLOCK FORMAT PATTERN.
Definition: sources.py:37
SOURCE BLOCK CLASS.
Definition: sources.py:201
SOURCE PROCESSOR CLASS.
Definition: sources.py:270