Coverage for src/c2puml/core/preprocessor.py: 68%

294 statements  

« prev     ^ index     » next       coverage.py v7.10.4, created at 2025-08-20 03:53 +0000

1#!/usr/bin/env python3 

2""" 

3Preprocessor module for C to PlantUML converter. 

4Handles #if, #elif, #else, #endif directives and conditional compilation. 

5""" 

6 

7import logging 

8import re 

9from dataclasses import dataclass 

10from enum import Enum 

11from typing import Dict, List, Optional, Set, Tuple 

12 

13from .parser_tokenizer import Token, TokenType 

14 

15 

16class PreprocessorDirective(Enum): 

17 """Types of preprocessor directives.""" 

18 

19 IF = "if" 

20 ELIF = "elif" 

21 ELSE = "else" 

22 ENDIF = "endif" 

23 IFDEF = "ifdef" 

24 IFNDEF = "ifndef" 

25 DEFINE = "define" 

26 UNDEF = "undef" 

27 

28 

29@dataclass 

30class PreprocessorBlock: 

31 """Represents a preprocessor conditional block.""" 

32 

33 directive: PreprocessorDirective 

34 condition: str 

35 start_token: int 

36 end_token: int 

37 is_active: bool 

38 children: List["PreprocessorBlock"] 

39 parent: Optional["PreprocessorBlock"] = None 

40 

41 

42class PreprocessorEvaluator: 

43 """Evaluates preprocessor conditions and manages conditional compilation.""" 

44 

45 def __init__(self): 

46 self.logger = logging.getLogger(__name__) 

47 self.defined_macros: Set[str] = set() 

48 self.macro_values: Dict[str, str] = {} 

49 self.blocks: List[PreprocessorBlock] = [] 

50 self.current_block_stack: List[PreprocessorBlock] = [] 

51 

52 def add_define(self, name: str, value: str = ""): 

53 """Add a defined macro.""" 

54 self.defined_macros.add(name) 

55 if value: 

56 self.macro_values[name] = value 

57 

58 def add_undef(self, name: str): 

59 """Remove a defined macro.""" 

60 self.defined_macros.discard(name) 

61 self.macro_values.pop(name, None) 

62 

63 def is_defined(self, name: str) -> bool: 

64 """Check if a macro is defined.""" 

65 return name in self.defined_macros 

66 

67 def get_macro_value(self, name: str) -> str: 

68 """Get the value of a defined macro.""" 

69 return self.macro_values.get(name, "") 

70 

71 def evaluate_condition(self, condition: str) -> bool: 

72 """Evaluate a preprocessor condition.""" 

73 if not condition.strip(): 

74 return True 

75 

76 # Handle defined() operator 

77 condition = self._expand_defined_operator(condition) 

78 

79 # Handle macro expansions 

80 condition = self._expand_macros(condition) 

81 

82 # Evaluate common patterns 

83 return self._evaluate_simple_expression(condition) 

84 

85 def _expand_defined_operator(self, condition: str) -> str: 

86 """Expand defined() operators in the condition.""" 

87 

88 def replace_defined(match): 

89 macro_name = match.group(1) 

90 return "1" if self.is_defined(macro_name) else "0" 

91 

92 # Replace defined(macro) with 1 or 0 

93 condition = re.sub( 

94 r"defined\s*\(\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*\)", replace_defined, condition 

95 ) 

96 return condition 

97 

98 def _expand_macros(self, condition: str) -> str: 

99 """Expand macros in the condition.""" 

100 # Macro expansion for common cases 

101 for macro_name, macro_value in self.macro_values.items(): 

102 # Replace macro name with its value 

103 pattern = r"\b" + re.escape(macro_name) + r"\b" 

104 condition = re.sub(pattern, macro_value, condition) 

105 return condition 

106 

107 def _evaluate_simple_expression(self, condition: str) -> bool: 

108 """Evaluate simple expressions like comparisons and logical operators.""" 

109 try: 

110 # Handle common comparison operators 

111 condition = condition.strip() 

112 

113 # Handle simple boolean values 

114 if condition.lower() in ["1", "true", "yes"]: 

115 return True 

116 if condition.lower() in ["0", "false", "no"]: 

117 return False 

118 

119 # Handle simple comparisons 

120 if "==" in condition: 

121 left, right = condition.split("==", 1) 

122 left_val = left.strip() 

123 right_val = right.strip() 

124 

125 # If both sides are known macros, compare their values 

126 if left_val in self.defined_macros and right_val in self.defined_macros: 

127 return self.get_macro_value(left_val) == self.get_macro_value( 

128 right_val 

129 ) 

130 # If one side is a known macro, compare with the other side 

131 elif left_val in self.defined_macros: 

132 return self.get_macro_value(left_val) == right_val 

133 elif right_val in self.defined_macros: 

134 return left_val == self.get_macro_value(right_val) 

135 # If neither side is a known macro, both are undefined, so they're equal 

136 else: 

137 return True # Both undefined macros are considered equal 

138 

139 if "!=" in condition: 

140 left, right = condition.split("!=", 1) 

141 left_val = left.strip() 

142 right_val = right.strip() 

143 

144 # If both sides are known macros, compare their values 

145 if left_val in self.defined_macros and right_val in self.defined_macros: 

146 return self.get_macro_value(left_val) != self.get_macro_value( 

147 right_val 

148 ) 

149 # If one side is a known macro, compare with the other side 

150 elif left_val in self.defined_macros: 

151 return self.get_macro_value(left_val) != right_val 

152 elif right_val in self.defined_macros: 

153 return left_val != self.get_macro_value(right_val) 

154 # If neither side is a known macro, do string comparison 

155 else: 

156 return left_val != right_val 

157 

158 if ">" in condition: 

159 left, right = condition.split(">", 1) 

160 try: 

161 left_val = self._evaluate_operand(left.strip()) 

162 right_val = self._evaluate_operand(right.strip()) 

163 return float(left_val) > float(right_val) 

164 except ValueError: 

165 return False 

166 if "<" in condition: 

167 left, right = condition.split("<", 1) 

168 try: 

169 left_val = self._evaluate_operand(left.strip()) 

170 right_val = self._evaluate_operand(right.strip()) 

171 return float(left_val) < float(right_val) 

172 except ValueError: 

173 return False 

174 if ">=" in condition: 

175 left, right = condition.split(">=", 1) 

176 try: 

177 left_val = self._evaluate_operand(left.strip()) 

178 right_val = self._evaluate_operand(right.strip()) 

179 return float(left_val) >= float(right_val) 

180 except ValueError: 

181 return False 

182 if "<=" in condition: 

183 left, right = condition.split("<=", 1) 

184 try: 

185 left_val = self._evaluate_operand(left.strip()) 

186 right_val = self._evaluate_operand(right.strip()) 

187 return float(left_val) <= float(right_val) 

188 except ValueError: 

189 return False 

190 

191 # Handle logical operators 

192 if "&&" in condition: 

193 parts = condition.split("&&") 

194 return all( 

195 self._evaluate_simple_expression(part.strip()) for part in parts 

196 ) 

197 if "||" in condition: 

198 parts = condition.split("||") 

199 return any( 

200 self._evaluate_simple_expression(part.strip()) for part in parts 

201 ) 

202 if "!" in condition: 

203 # Negation 

204 negated = condition.replace("!", "").strip() 

205 return not self._evaluate_simple_expression(negated) 

206 

207 # If it's just a macro name, check if it's defined 

208 if condition in self.defined_macros: 

209 return True 

210 

211 # Try to evaluate as a number 

212 try: 

213 return bool(int(condition)) 

214 except ValueError: 

215 pass 

216 

217 # Default to True for unknown conditions (backward compatibility) 

218 # This ensures existing tests continue to work 

219 return True 

220 

221 except ValueError as e: 

222 self.logger.warning( 

223 "Error evaluating preprocessor condition '%s': %s", condition, e 

224 ) 

225 # Default to True for unknown conditions (backward compatibility) 

226 return True 

227 

228 def _evaluate_operand(self, operand: str) -> str: 

229 """Evaluate an operand, expanding macros if they are defined.""" 

230 operand = operand.strip() 

231 

232 # If it's a defined macro, return its value 

233 if operand in self.defined_macros: 

234 return self.get_macro_value(operand) 

235 

236 # Otherwise, return the operand as-is 

237 return operand 

238 

239 def parse_preprocessor_blocks(self, tokens: List[Token]) -> List[PreprocessorBlock]: 

240 """Parse preprocessor blocks from tokens.""" 

241 blocks = [] 

242 stack = [] 

243 

244 i = 0 

245 while i < len(tokens): 

246 token = tokens[i] 

247 

248 if token.type == TokenType.PREPROCESSOR: 

249 directive = self._parse_directive(token.value) 

250 

251 if ( 

252 directive == PreprocessorDirective.IF 

253 or directive == PreprocessorDirective.IFDEF 

254 or directive == PreprocessorDirective.IFNDEF 

255 ): 

256 # Start new block 

257 condition = self._extract_condition(token.value, directive) 

258 block = PreprocessorBlock( 

259 directive=directive, 

260 condition=condition, 

261 start_token=i, 

262 end_token=-1, # Will be set when we find #endif 

263 is_active=self._should_activate_block( 

264 directive, condition, stack 

265 ), 

266 children=[], 

267 parent=stack[-1] if stack else None, 

268 ) 

269 

270 if stack: 

271 stack[-1].children.append(block) 

272 else: 

273 blocks.append(block) 

274 

275 stack.append(block) 

276 

277 elif directive == PreprocessorDirective.ELIF: 

278 if stack: 

279 # Update current block 

280 current_block = stack[-1] 

281 condition = self._extract_condition(token.value, directive) 

282 current_block.is_active = self._should_activate_block( 

283 directive, condition, stack 

284 ) 

285 

286 elif directive == PreprocessorDirective.ELSE: 

287 if stack: 

288 # Update current block 

289 current_block = stack[-1] 

290 current_block.is_active = self._should_activate_block( 

291 directive, "", stack 

292 ) 

293 

294 elif directive == PreprocessorDirective.ENDIF: 

295 if stack: 

296 # End current block 

297 current_block = stack.pop() 

298 current_block.end_token = i 

299 

300 elif directive == PreprocessorDirective.DEFINE: 

301 # Handle #define 

302 macro_name, macro_value = self._parse_define(token.value) 

303 self.add_define(macro_name, macro_value) 

304 

305 elif directive == PreprocessorDirective.UNDEF: 

306 # Handle #undef 

307 macro_name = self._parse_undef(token.value) 

308 self.add_undef(macro_name) 

309 

310 i += 1 

311 

312 return blocks 

313 

314 def _parse_directive(self, value: str) -> PreprocessorDirective: 

315 """Parse the directive type from a preprocessor token.""" 

316 value = value.strip() 

317 if value.startswith("#ifdef"): 

318 return PreprocessorDirective.IFDEF 

319 elif value.startswith("#ifndef"): 

320 return PreprocessorDirective.IFNDEF 

321 elif value.startswith("#if"): 

322 return PreprocessorDirective.IF 

323 elif value.startswith("#elif"): 

324 return PreprocessorDirective.ELIF 

325 elif value.startswith("#else"): 

326 return PreprocessorDirective.ELSE 

327 elif value.startswith("#endif"): 

328 return PreprocessorDirective.ENDIF 

329 elif value.startswith("#define"): 

330 return PreprocessorDirective.DEFINE 

331 elif value.startswith("#undef"): 

332 return PreprocessorDirective.UNDEF 

333 else: 

334 return PreprocessorDirective.IF # Default 

335 

336 def _extract_condition(self, value: str, directive: PreprocessorDirective) -> str: 

337 """Extract the condition from a preprocessor directive.""" 

338 value = value.strip() 

339 

340 if directive == PreprocessorDirective.IFDEF: 

341 match = re.search(r"#ifdef\s+([a-zA-Z_][a-zA-Z0-9_]*)", value) 

342 return match.group(1) if match else "" 

343 elif directive == PreprocessorDirective.IFNDEF: 

344 match = re.search(r"#ifndef\s+([a-zA-Z_][a-zA-Z0-9_]*)", value) 

345 return match.group(1) if match else "" 

346 elif directive == PreprocessorDirective.IF: 

347 match = re.search(r"#if\s+(.+)", value) 

348 return match.group(1).strip() if match else "" 

349 elif directive == PreprocessorDirective.ELIF: 

350 match = re.search(r"#elif\s+(.+)", value) 

351 return match.group(1).strip() if match else "" 

352 else: 

353 return "" 

354 

355 def _parse_define(self, value: str) -> Tuple[str, str]: 

356 """Parse #define directive.""" 

357 value = value.strip() 

358 match = re.search(r"#define\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*(.+)", value) 

359 if match: 

360 name = match.group(1) 

361 macro_value = match.group(2).strip() 

362 return name, macro_value 

363 return "", "" 

364 

365 def _parse_undef(self, value: str) -> str: 

366 """Parse #undef directive.""" 

367 value = value.strip() 

368 match = re.search(r"#undef\s+([a-zA-Z_][a-zA-Z0-9_]*)", value) 

369 return match.group(1) if match else "" 

370 

371 def _should_activate_block( 

372 self, 

373 directive: PreprocessorDirective, 

374 condition: str, 

375 stack: List[PreprocessorBlock], 

376 ) -> bool: 

377 """Determine if a block should be active based on its directive and condition.""" 

378 # Check if parent blocks are active 

379 if stack and not stack[-1].is_active: 

380 return False 

381 

382 if directive == PreprocessorDirective.IFDEF: 

383 return self.is_defined(condition) 

384 elif directive == PreprocessorDirective.IFNDEF: 

385 return not self.is_defined(condition) 

386 elif directive == PreprocessorDirective.IF: 

387 return self.evaluate_condition(condition) 

388 elif directive == PreprocessorDirective.ELIF: 

389 # For #elif, we need to check if no previous branch was taken 

390 if stack: 

391 parent = stack[-1] 

392 # Check if any previous child was active 

393 for child in parent.children: 

394 if child.is_active: 

395 return False 

396 return self.evaluate_condition(condition) 

397 return False 

398 elif directive == PreprocessorDirective.ELSE: 

399 # For #else, we need to check if no previous branch was taken 

400 if stack: 

401 parent = stack[-1] 

402 # Check if any previous child was active 

403 for child in parent.children: 

404 if child.is_active: 

405 return False 

406 return True 

407 return False 

408 else: 

409 return False 

410 

411 def filter_tokens(self, tokens: List[Token]) -> List[Token]: 

412 """Filter tokens based on preprocessor blocks, keeping only active content.""" 

413 blocks = self.parse_preprocessor_blocks(tokens) 

414 filtered_tokens = [] 

415 

416 i = 0 

417 while i < len(tokens): 

418 token = tokens[i] 

419 

420 # Check if this token is inside a preprocessor block 

421 in_active_block = self._is_token_in_active_block(i, blocks) 

422 

423 if in_active_block: 

424 # Include the token if it's not a preprocessor directive 

425 if token.type != TokenType.PREPROCESSOR: 

426 filtered_tokens.append(token) 

427 else: 

428 # Skip tokens that are not in active blocks 

429 pass 

430 

431 i += 1 

432 

433 return filtered_tokens 

434 

435 def _is_token_in_active_block( 

436 self, token_index: int, blocks: List[PreprocessorBlock] 

437 ) -> bool: 

438 """Check if a token is inside an active preprocessor block.""" 

439 # Check all blocks recursively 

440 for block in blocks: 

441 if self._is_token_in_block(token_index, block): 

442 # If token is in this block, return whether the block is active 

443 return block.is_active 

444 return True # Default to True if not in any block 

445 

446 def _is_token_in_block(self, token_index: int, block: PreprocessorBlock) -> bool: 

447 """Check if a token is inside a specific block.""" 

448 if block.start_token <= token_index <= block.end_token: 

449 # Check if any child block contains this token 

450 for child in block.children: 

451 if self._is_token_in_block(token_index, child): 

452 # If token is in a child block, return whether the child is active 

453 return child.is_active 

454 # Token is in this block but not in any child block 

455 return True 

456 return False 

457 

458 

459class PreprocessorManager: 

460 """High-level interface for preprocessor management.""" 

461 

462 def __init__(self): 

463 self.evaluator = PreprocessorEvaluator() 

464 self.logger = logging.getLogger(__name__) 

465 

466 def process_file( 

467 self, tokens: List[Token], defines: Optional[Dict[str, str]] = None 

468 ) -> List[Token]: 

469 """Process a file's tokens through the preprocessor.""" 

470 if defines: 

471 for name, value in defines.items(): 

472 self.evaluator.add_define(name, value) 

473 

474 # Filter tokens based on preprocessor directives 

475 filtered_tokens = self.evaluator.filter_tokens(tokens) 

476 

477 self.logger.debug( 

478 f"Preprocessor: {len(tokens)} tokens -> {len(filtered_tokens)} tokens" 

479 ) 

480 return filtered_tokens 

481 

482 def add_defines_from_content(self, tokens: List[Token]): 

483 """Extract #define directives from tokens and add them to the evaluator.""" 

484 for token in tokens: 

485 if token.type == TokenType.PREPROCESSOR and token.value.startswith( 

486 "#define" 

487 ): 

488 name, value = self.evaluator._parse_define(token.value) 

489 if name: 

490 self.evaluator.add_define(name, value) 

491 self.logger.debug("Preprocessor: Added define %s = %s", name, value)