cprover
Loading...
Searching...
No Matches
c_wrangler.cpp
Go to the documentation of this file.
1/*******************************************************************\
2
3Module: C Wrangler
4
5Author: Daniel Kroening, dkr@amazon.com
6
7\*******************************************************************/
8
11
12#include "c_wrangler.h"
13
14#include "c_defines.h"
15#include "ctokenit.h"
16#include "mini_c_parser.h"
17
18#include <util/cprover_prefix.h>
20#include <util/file_util.h>
21#include <util/json.h>
22#include <util/optional.h>
23#include <util/prefix.h>
24#include <util/run.h>
25#include <util/string_utils.h>
26#include <util/suffix.h>
27#include <util/tempdir.h>
28
29#include <fstream>
30#include <iostream>
31#include <map>
32#include <regex>
33#include <sstream>
34#include <unordered_map>
35
37{
38 // sources and preprocessing
39 std::vector<std::string> source_files;
40 std::vector<std::string> includes;
41 std::vector<std::string> defines;
42
43 // transformations
45 {
46 std::string clause;
47 std::string content;
48 contract_clauset(std::string _clause, std::string _content)
49 : clause(std::move(_clause)), content(std::move(_content))
50 {
51 }
52 };
53
55 {
56 std::string loop_type;
57 std::string identifier;
58 std::string content;
60 std::string _loop_type,
61 std::string _identifier,
62 std::string _content)
63 : loop_type(std::move(_loop_type)),
64 identifier(std::move(_identifier)),
65 content(std::move(_content))
66 {
67 }
68 };
69
71 {
72 std::string identifier;
73 std::string content;
74 assertiont(std::string _identifier, std::string _content)
75 : identifier(std::move(_identifier)), content(std::move(_content))
76 {
77 }
78 };
79
80 struct functiont
81 {
82 // should be variant to preserve ordering
83 std::vector<contract_clauset> contract;
84 std::vector<loop_invariantt> loop_invariants;
85 std::vector<assertiont> assertions;
87 bool remove_static = false;
88 };
89
90 using functionst = std::list<std::pair<std::regex, functiont>>;
92
93 struct objectt
94 {
95 bool remove_static = false;
96 };
97
98 using objectst = std::list<std::pair<std::regex, objectt>>;
100
101 // output
102 std::string output;
103
104 void configure_sources(const jsont &);
105 void configure_functions(const jsont &);
106 void configure_objects(const jsont &);
107 void configure_output(const jsont &);
108};
109
111{
112 auto sources = config["sources"];
113
114 if(!sources.is_null())
115 {
116 if(!sources.is_array())
117 throw deserialization_exceptiont("sources entry must be sequence");
118
119 for(const auto &source : to_json_array(sources))
120 {
121 if(!source.is_string())
122 throw deserialization_exceptiont("source must be string");
123
124 this->source_files.push_back(source.value);
125 }
126 }
127
128 auto includes = config["includes"];
129
130 if(!includes.is_null())
131 {
132 if(!includes.is_array())
133 throw deserialization_exceptiont("includes entry must be sequence");
134
135 for(const auto &include : to_json_array(includes))
136 {
137 if(!include.is_string())
138 throw deserialization_exceptiont("include must be string");
139
140 this->includes.push_back(include.value);
141 }
142 }
143
144 auto defines = config["defines"];
145
146 if(!defines.is_null())
147 {
148 if(!defines.is_array())
149 throw deserialization_exceptiont("defines entry must be sequence");
150
151 for(const auto &define : to_json_array(defines))
152 {
153 if(!define.is_string())
154 throw deserialization_exceptiont("define must be string");
155
156 this->defines.push_back(define.value);
157 }
158 }
159}
160
162{
163 auto functions = config["functions"];
164
165 if(functions.is_null())
166 return;
167
168 if(!functions.is_array())
169 throw deserialization_exceptiont("functions entry must be sequence");
170
171 for(const auto &function : to_json_array(functions))
172 {
173 if(!function.is_object())
174 throw deserialization_exceptiont("function entry must be object");
175
176 for(const auto &function_entry : to_json_object(function))
177 {
178 const auto function_name = function_entry.first;
179 const auto &items = function_entry.second;
180
181 if(!items.is_array())
182 throw deserialization_exceptiont("function entry must be sequence");
183
184 this->functions.emplace_back(function_name, functiont{});
185 functiont &function_config = this->functions.back().second;
186
187 for(const auto &function_item : to_json_array(items))
188 {
189 // These need to start with "ensures", "requires", "assigns",
190 // "invariant", "assert", "stub", "remove"
191 if(!function_item.is_string())
192 throw deserialization_exceptiont("function entry must be string");
193
194 auto item_string = function_item.value;
195 auto split = split_string(item_string, ' ');
196 if(split.empty())
197 continue;
198
199 if(
200 split[0] == "ensures" || split[0] == "requires" ||
201 split[0] == "assigns")
202 {
203 std::ostringstream rest;
204 join_strings(rest, split.begin() + 1, split.end(), ' ');
205
206 function_config.contract.emplace_back(split[0], rest.str());
207 }
208 else if(split[0] == "assert" && split.size() >= 3)
209 {
210 std::ostringstream rest;
211 join_strings(rest, split.begin() + 2, split.end(), ' ');
212
213 function_config.assertions.emplace_back(split[1], rest.str());
214 }
215 else if(
216 (split[0] == "for" && split.size() >= 3 && split[2] == "invariant") ||
217 (split[0] == "while" && split.size() >= 3 && split[2] == "invariant"))
218 {
219 std::ostringstream rest;
220 join_strings(rest, split.begin() + 3, split.end(), ' ');
221
222 function_config.loop_invariants.emplace_back(
223 split[0], split[1], rest.str());
224 }
225 else if(split[0] == "stub")
226 {
227 std::ostringstream rest;
228 join_strings(rest, split.begin() + 1, split.end(), ' ');
229
230 function_config.stub = rest.str();
231 }
232 else if(split[0] == "remove")
233 {
234 if(split.size() == 1)
235 throw deserialization_exceptiont("unexpected remove entry");
236
237 if(split[1] == "static")
238 function_config.remove_static = true;
239 else
241 "unexpected remove entry " + split[1]);
242 }
243 else
245 "unexpected function entry " + split[0]);
246 }
247 }
248 }
249}
250
252{
253 auto objects = config["objects"];
254
255 if(objects.is_null())
256 return;
257
258 if(!objects.is_array())
259 throw deserialization_exceptiont("objects entry must be sequence");
260
261 for(const auto &object : to_json_array(objects))
262 {
263 if(!object.is_object())
264 throw deserialization_exceptiont("object entry must be object");
265
266 for(const auto &object_entry : to_json_object(object))
267 {
268 const auto &object_name = object_entry.first;
269 const auto &items = object_entry.second;
270
271 if(!items.is_array())
272 throw deserialization_exceptiont("object entry must be sequence");
273
274 this->objects.emplace_back(object_name, objectt{});
275 objectt &object_config = this->objects.back().second;
276
277 for(const auto &object_item : to_json_array(items))
278 {
279 // Needs to start with "remove"
280 if(!object_item.is_string())
281 throw deserialization_exceptiont("object entry must be string");
282
283 auto item_string = object_item.value;
284 auto split = split_string(item_string, ' ');
285 if(split.empty())
286 continue;
287
288 if(split[0] == "remove")
289 {
290 if(split.size() == 1)
291 throw deserialization_exceptiont("unexpected remove entry");
292
293 if(split[1] == "static")
294 object_config.remove_static = true;
295 else
297 "unexpected remove entry " + split[1]);
298 }
299 else
301 "unexpected object entry " + split[0]);
302 }
303 }
304 }
305}
306
308{
309 auto output = config["output"];
310
311 if(output.is_null())
312 return;
313
314 if(!output.is_string())
315 throw deserialization_exceptiont("output entry must be string");
316
317 this->output = output.value;
318}
319
320static std::string
321preprocess(const std::string &source_file, const c_wranglert &c_wrangler)
322{
323 std::vector<std::string> argv = {"cc", "-E", source_file};
324
325 for(const auto &include : c_wrangler.includes)
326 {
327 argv.push_back("-I");
328 argv.push_back(include);
329 }
330
331 for(const auto &define : c_wrangler.defines)
332 argv.push_back(std::string("-D") + define);
333
334 std::ostringstream result;
335
336 auto run_result = run("cc", argv, "", result, "");
337 if(run_result != 0)
338 throw system_exceptiont("preprocessing " + source_file + " has failed");
339
340 return result.str();
341}
342
343static c_definest
344get_defines(const std::string &source_file, const c_wranglert &config)
345{
346 std::vector<std::string> argv = {"cc", "-E", "-dM", source_file};
347
348 for(const auto &include : config.includes)
349 {
350 argv.push_back("-I");
351 argv.push_back(include);
352 }
353
354 std::ostringstream result;
355
356 auto run_result = run("cc", argv, "", result, "");
357 if(run_result != 0)
358 throw system_exceptiont("preprocessing " + source_file + " has failed");
359
360 c_definest defines;
361 defines.parse(result.str());
362 return defines;
363}
364
365static void mangle_function(
366 const c_declarationt &declaration,
367 const c_definest &defines,
368 const c_wranglert::functiont &function_config,
369 std::ostream &out)
370{
371 if(function_config.stub.has_value())
372 {
373 // replace by stub
374 out << function_config.stub.value();
375 }
376 else
377 {
378 if(function_config.remove_static)
379 {
380 for(auto &t : declaration.pre_declarator)
381 {
382 if(t.text == "static")
383 {
384 // we replace by white space
385 out << std::string(6, ' ');
386 }
387 else
388 out << t.text;
389 }
390 }
391 else
392 {
393 for(auto &t : declaration.pre_declarator)
394 out << t.text;
395 }
396
397 for(auto &t : declaration.declarator)
398 out << t.text;
399 for(auto &t : declaration.post_declarator)
400 out << t.text;
401
402 for(const auto &entry : function_config.contract)
403 out << ' ' << CPROVER_PREFIX << entry.clause << '('
404 << defines(entry.content) << ')';
405
406 std::map<std::string, std::string> loop_invariants;
407
408 for(const auto &entry : function_config.loop_invariants)
409 loop_invariants[entry.loop_type + entry.identifier] = entry.content;
410
411 if(loop_invariants.empty())
412 {
413 for(auto &t : declaration.initializer)
414 out << t.text;
415 }
416 else
417 {
418 std::size_t for_count = 0, while_count = 0;
419 ctokenitt t(declaration.initializer);
420
421 while(t)
422 {
423 const auto &token = *(t++);
424 out << token.text;
425
426 if(token == "while")
427 {
428 while_count++;
429 const auto &invariant =
430 loop_invariants["while" + std::to_string(while_count)];
431
432 if(!invariant.empty())
433 {
434 auto t_end = match_bracket(t, '(', ')');
435 for(; t != t_end; t++)
436 out << t->text;
437 out << ' ' << CPROVER_PREFIX << "loop_invariant("
438 << defines(invariant) << ')';
439 }
440 }
441 else if(token == "for")
442 {
443 for_count++;
444 const auto &invariant =
445 loop_invariants["for" + std::to_string(for_count)];
446
447 if(!invariant.empty())
448 {
449 auto t_end = match_bracket(t, '(', ')');
450 for(; t != t_end; t++)
451 out << t->text;
452 out << ' ' << CPROVER_PREFIX << "invariant(" << defines(invariant)
453 << ')';
454 }
455 }
456 }
457 }
458 }
459}
460
461static void mangle_object(
462 const c_declarationt &declaration,
463 const c_definest &defines,
464 const c_wranglert::objectt &object_config,
465 std::ostream &out)
466{
467 if(object_config.remove_static)
468 {
469 for(auto &t : declaration.pre_declarator)
470 {
471 if(t.text == "static")
472 {
473 // we replace by white space
474 out << std::string(6, ' ');
475 }
476 else
477 out << t.text;
478 }
479 }
480 else
481 {
482 for(auto &t : declaration.pre_declarator)
483 out << t.text;
484 }
485
486 for(auto &t : declaration.declarator)
487 out << t.text;
488 for(auto &t : declaration.post_declarator)
489 out << t.text;
490 for(auto &t : declaration.initializer)
491 out << t.text;
492}
493
494static void mangle(
495 const c_declarationt &declaration,
496 const c_definest &defines,
497 const c_wranglert &config,
498 std::ostream &out)
499{
500 auto name_opt = declaration.declared_identifier();
501 if(
502 declaration.is_function() && name_opt.has_value() && declaration.has_body())
503 {
504 for(const auto &entry : config.functions)
505 {
506 if(std::regex_match(name_opt->text, entry.first))
507 {
508 // we are to modify this function
509 mangle_function(declaration, defines, entry.second, out);
510
511 return;
512 }
513 }
514 }
515 else if(!declaration.is_function() && name_opt.has_value())
516 {
517 for(const auto &entry : config.objects)
518 {
519 if(std::regex_match(name_opt->text, entry.first))
520 {
521 // we are to modify this function
522 mangle_object(declaration, defines, entry.second, out);
523
524 return;
525 }
526 }
527 }
528
529 // output
530 out << declaration;
531}
532
533static std::string mangle(
534 const std::string &in,
535 const c_definest &defines,
536 const c_wranglert &config)
537{
538 std::ostringstream out;
539 std::istringstream in_str(in);
540
541 auto parsed = parse_c(in_str);
542
543 for(const auto &declaration : parsed)
544 mangle(declaration, defines, config, out);
545
546 return out.str();
547}
548
550{
552
553 c_wrangler.configure_sources(config);
554 c_wrangler.configure_functions(config);
555 c_wrangler.configure_objects(config);
556 c_wrangler.configure_output(config);
557
558 for(auto &source_file : c_wrangler.source_files)
559 {
560 // first preprocess
561 auto preprocessed = preprocess(source_file, c_wrangler);
562
563 // get the defines
564 auto defines = get_defines(source_file, c_wrangler);
565
566 // now mangle
567 auto mangled = mangle(preprocessed, defines, c_wrangler);
568
569 // now output
570 if(c_wrangler.output == "stdout" || c_wrangler.output.empty())
571 {
572 std::cout << mangled;
573 }
574 else
575 {
576 std::ofstream out(c_wrangler.output);
577 out << mangled;
578 }
579 }
580}
c_defines
static void mangle_function(const c_declarationt &declaration, const c_definest &defines, const c_wranglert::functiont &function_config, std::ostream &out)
Definition: c_wrangler.cpp:365
static c_definest get_defines(const std::string &source_file, const c_wranglert &config)
Definition: c_wrangler.cpp:344
static void mangle_object(const c_declarationt &declaration, const c_definest &defines, const c_wranglert::objectt &object_config, std::ostream &out)
Definition: c_wrangler.cpp:461
void c_wrangler(const jsont &config)
Definition: c_wrangler.cpp:549
static std::string preprocess(const std::string &source_file, const c_wranglert &c_wrangler)
Definition: c_wrangler.cpp:321
static void mangle(const c_declarationt &declaration, const c_definest &defines, const c_wranglert &config, std::ostream &out)
Definition: c_wrangler.cpp:494
C Wrangler.
This class maintains a representation of one assignment to the preprocessor macros in a C program.
Definition: c_defines.h:24
void parse(const std::string &)
Definition: c_defines.cpp:21
std::string text
Definition: ctoken.h:40
Thrown when failing to deserialize a value from some low level format, like JSON or raw bytes.
Definition: json.h:27
Thrown when some external system fails unexpectedly.
configt config
Definition: config.cpp:25
#define CPROVER_PREFIX
ctokenitt match_bracket(ctokenitt t, char open, char close)
Definition: ctokenit.cpp:33
ctokenit
json_objectt & to_json_object(jsont &json)
Definition: json.h:444
json_arrayt & to_json_array(jsont &json)
Definition: json.h:426
c_translation_unitt parse_c(std::istream &in)
Mini C Parser.
STL namespace.
nonstd::optional< T > optionalt
Definition: optional.h:35
int run(const std::string &what, const std::vector< std::string > &argv)
Definition: run.cpp:48
void split_string(const std::string &s, char delim, std::vector< std::string > &result, bool strip, bool remove_empty)
Stream & join_strings(Stream &&os, const It b, const It e, const Delimiter &delimiter, TransformFunc &&transform_func)
Prints items to an stream, separated by a constant delimiter.
Definition: string_utils.h:62
bool has_body() const
bool is_function() const
tokenst post_declarator
Definition: mini_c_parser.h:29
tokenst initializer
Definition: mini_c_parser.h:30
optionalt< ctokent > declared_identifier() const
tokenst declarator
Definition: mini_c_parser.h:28
tokenst pre_declarator
Definition: mini_c_parser.h:27
assertiont(std::string _identifier, std::string _content)
Definition: c_wrangler.cpp:74
contract_clauset(std::string _clause, std::string _content)
Definition: c_wrangler.cpp:48
std::vector< loop_invariantt > loop_invariants
Definition: c_wrangler.cpp:84
std::vector< assertiont > assertions
Definition: c_wrangler.cpp:85
std::vector< contract_clauset > contract
Definition: c_wrangler.cpp:83
optionalt< std::string > stub
Definition: c_wrangler.cpp:86
loop_invariantt(std::string _loop_type, std::string _identifier, std::string _content)
Definition: c_wrangler.cpp:59
void configure_output(const jsont &)
Definition: c_wrangler.cpp:307
void configure_objects(const jsont &)
Definition: c_wrangler.cpp:251
objectst objects
Definition: c_wrangler.cpp:99
std::vector< std::string > source_files
Definition: c_wrangler.cpp:39
std::string output
Definition: c_wrangler.cpp:102
std::list< std::pair< std::regex, functiont > > functionst
Definition: c_wrangler.cpp:90
std::list< std::pair< std::regex, objectt > > objectst
Definition: c_wrangler.cpp:98
functionst functions
Definition: c_wrangler.cpp:91
void configure_sources(const jsont &)
Definition: c_wrangler.cpp:110
std::vector< std::string > includes
Definition: c_wrangler.cpp:40
std::vector< std::string > defines
Definition: c_wrangler.cpp:41
void configure_functions(const jsont &)
Definition: c_wrangler.cpp:161