cprover
compile.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module: Compile and link source and object files.
4 
5 Author: CM Wintersteiger
6 
7 Date: June 2006
8 
9 \*******************************************************************/
10 
13 
14 #include "compile.h"
15 
16 #include <cstring>
17 #include <fstream>
18 #include <iostream>
19 
20 #include <util/cmdline.h>
21 #include <util/config.h>
22 #include <util/file_util.h>
23 #include <util/get_base_name.h>
24 #include <util/suffix.h>
25 #include <util/tempdir.h>
26 #include <util/unicode.h>
27 #include <util/version.h>
28 
29 #include <ansi-c/ansi_c_language.h>
31 
36 
37 #include <langapi/mode.h>
38 
40 
41 #define DOTGRAPHSETTINGS "color=black;" \
42  "orientation=portrait;" \
43  "fontsize=20;"\
44  "compound=true;"\
45  "size=\"30,40\";"\
46  "ratio=compress;"
47 
48 // the following are for chdir
49 
50 #if defined(__linux__) || \
51  defined(__FreeBSD_kernel__) || \
52  defined(__GNU__) || \
53  defined(__unix__) || \
54  defined(__CYGWIN__) || \
55  defined(__MACH__)
56 #include <unistd.h>
57 #endif
58 
59 #ifdef _WIN32
60 #include <direct.h>
61 #include <windows.h>
62 #define chdir _chdir
63 #define popen _popen
64 #define pclose _pclose
65 #endif
66 
71 {
73 
75 
76  // Parse command line for source and object file names
77  for(std::size_t i=0; i<_cmdline.args.size(); i++)
79  return true;
80 
81  for(std::list<std::string>::const_iterator it = libraries.begin();
82  it!=libraries.end();
83  it++)
84  {
85  if(!find_library(*it))
86  // GCC is going to complain if this doesn't exist
87  debug() << "Library not found: " << *it << " (ignoring)" << eom;
88  }
89 
90  statistics() << "No. of source files: " << source_files.size() << eom;
91  statistics() << "No. of object files: " << object_files.size() << eom;
92 
93  // Work through the given source files
94 
95  if(source_files.empty() && object_files.empty())
96  {
97  error() << "no input files" << eom;
98  return true;
99  }
100 
101  if(mode==LINK_LIBRARY && !source_files.empty())
102  {
103  error() << "cannot link source files" << eom;
104  return true;
105  }
106 
107  if(mode==PREPROCESS_ONLY && !object_files.empty())
108  {
109  error() << "cannot preprocess object files" << eom;
110  return true;
111  }
112 
113  const unsigned warnings_before=
115 
116  if(!source_files.empty())
117  if(compile())
118  return true;
119 
120  if(mode==LINK_LIBRARY ||
121  mode==COMPILE_LINK ||
123  {
124  if(link())
125  return true;
126  }
127 
128  return
131  warnings_before;
132 }
133 
134 enum class file_typet
135 {
137  UNKNOWN,
138  SOURCE_FILE,
140  THIN_ARCHIVE,
141  GOTO_BINARY,
142  ELF_OBJECT
143 };
144 
145 static file_typet detect_file_type(const std::string &file_name)
146 {
147  // first of all, try to open the file
148  std::ifstream in(file_name);
149  if(!in)
151 
152  const std::string::size_type r = file_name.rfind('.');
153 
154  const std::string ext =
155  r == std::string::npos ? "" : file_name.substr(r + 1, file_name.length());
156 
157  if(
158  ext == "c" || ext == "cc" || ext == "cp" || ext == "cpp" || ext == "CPP" ||
159  ext == "c++" || ext == "C" || ext == "i" || ext == "ii" || ext == "class" ||
160  ext == "jar" || ext == "jsil")
161  {
163  }
164 
165  char hdr[8];
166  in.get(hdr, 8);
167  if((ext == "a" || ext == "o") && strncmp(hdr, "!<thin>", 8) == 0)
169 
170  if(ext == "a")
172 
173  if(is_goto_binary(file_name))
175 
176  if(hdr[0] == 0x7f && memcmp(hdr + 1, "ELF", 3) == 0)
177  return file_typet::ELF_OBJECT;
178 
179  return file_typet::UNKNOWN;
180 }
181 
184 bool compilet::add_input_file(const std::string &file_name)
185 {
186  switch(detect_file_type(file_name))
187  {
189  warning() << "failed to open file `" << file_name
190  << "': " << std::strerror(errno) << eom;
191  return warning_is_fatal; // generously ignore unless -Werror
192 
193  case file_typet::UNKNOWN:
194  // unknown extension, not a goto binary, will silently ignore
195  debug() << "unknown file type in `" << file_name << "'" << eom;
196  return false;
197 
199  // ELF file without goto-cc section, silently ignore
200  debug() << "ELF object without goto-cc section: `" << file_name << "'"
201  << eom;
202  return false;
203 
205  source_files.push_back(file_name);
206  return false;
207 
209  return add_files_from_archive(file_name, false);
210 
212  return add_files_from_archive(file_name, true);
213 
215  object_files.push_back(file_name);
216  return false;
217  }
218 
219  UNREACHABLE;
220 }
221 
225  const std::string &file_name,
226  bool thin_archive)
227 {
228  std::stringstream cmd;
229  FILE *stream;
230 
231  std::string tstr = working_directory;
232 
233  if(!thin_archive)
234  {
235  tstr = get_temporary_directory("goto-cc.XXXXXX");
236 
237  if(tstr=="")
238  {
239  error() << "Cannot create temporary directory" << eom;
240  return true;
241  }
242 
243  tmp_dirs.push_back(tstr);
244  if(chdir(tmp_dirs.back().c_str())!=0)
245  {
246  error() << "Cannot switch to temporary directory" << eom;
247  return true;
248  }
249 
250  // unpack now
251  cmd << "ar x " << concat_dir_file(working_directory, file_name);
252 
253  stream=popen(cmd.str().c_str(), "r");
254  pclose(stream);
255 
256  cmd.clear();
257  cmd.str("");
258  }
259 
260  // add the files from "ar t"
261  cmd << "ar t " << concat_dir_file(working_directory, file_name);
262 
263  stream = popen(cmd.str().c_str(), "r");
264 
265  if(stream != nullptr)
266  {
267  std::string line;
268  int ch; // fgetc returns an int, not char
269  while((ch = fgetc(stream)) != EOF)
270  {
271  if(ch != '\n')
272  {
273  line += static_cast<char>(ch);
274  }
275  else
276  {
277  std::string t = concat_dir_file(tstr, line);
278 
279  if(is_goto_binary(t))
280  object_files.push_back(t);
281  else
282  debug() << "Object file is not a goto binary: " << line << eom;
283 
284  line = "";
285  }
286  }
287 
288  pclose(stream);
289  }
290 
291  if(!thin_archive && chdir(working_directory.c_str()) != 0)
292  error() << "Could not change back to working directory" << eom;
293 
294  return false;
295 }
296 
300 bool compilet::find_library(const std::string &name)
301 {
302  std::string tmp;
303 
304  for(std::list<std::string>::const_iterator
305  it=library_paths.begin();
306  it!=library_paths.end();
307  it++)
308  {
309  #ifdef _WIN32
310  tmp = *it + "\\lib";
311  #else
312  tmp = *it + "/lib";
313  #endif
314 
315  std::ifstream in(tmp+name+".a");
316 
317  if(in.is_open())
318  return !add_input_file(tmp+name+".a");
319  else
320  {
321  std::string libname=tmp+name+".so";
322 
323  switch(detect_file_type(libname))
324  {
326  return !add_input_file(libname);
327 
329  warning() << "Warning: Cannot read ELF library " << libname << eom;
330  return warning_is_fatal;
331 
332  default:
333  break;
334  }
335  }
336  }
337 
338  return false;
339 }
340 
344 {
345  // "compile" hitherto uncompiled functions
346  statistics() << "Compiling functions" << eom;
348 
349  // parse object files
350  for(const auto &file_name : object_files)
351  {
352  if(read_object_and_link(file_name, symbol_table,
354  return true;
355  }
356 
357  // produce entry point?
358 
360  {
361  // new symbols may have been added to a previously linked file
362  // make sure a new entry point is created that contains all
363  // static initializers
365 
368 
370  return true;
371 
372  // entry_point may (should) add some more functions.
374  }
375 
378  return true;
379 
381 }
382 
387 {
388  while(!source_files.empty())
389  {
390  std::string file_name=source_files.front();
391  source_files.pop_front();
392 
393  // Visual Studio always prints the name of the file it's doing
394  // onto stdout. The name of the directory is stripped.
395  if(echo_file_name)
396  std::cout << get_base_name(file_name, false) << '\n' << std::flush;
397 
398  bool r=parse_source(file_name); // don't break the program!
399 
400  if(r)
401  {
402  const std::string &debug_outfile=
403  cmdline.get_value("print-rejected-preprocessed-source");
404  if(!debug_outfile.empty())
405  {
406  std::ifstream in(file_name, std::ios::binary);
407  std::ofstream out(debug_outfile, std::ios::binary);
408  out << in.rdbuf();
409  warning() << "Failed sources in " << debug_outfile << eom;
410  }
411 
412  return true; // parser/typecheck error
413  }
414 
416  {
417  // output an object file for every source file
418 
419  // "compile" functions
421 
422  std::string cfn;
423 
424  if(output_file_object=="")
425  {
426  const std::string file_name_with_obj_ext =
427  get_base_name(file_name, true) + "." + object_file_extension;
428 
429  if(!output_directory_object.empty())
430  cfn = concat_dir_file(output_directory_object, file_name_with_obj_ext);
431  else
432  cfn = file_name_with_obj_ext;
433  }
434  else
435  cfn = output_file_object;
436 
438  return true;
439 
441  return true;
442 
443  symbol_table.clear(); // clean symbol table for next source file.
445  }
446  }
447 
448  return false;
449 }
450 
453 bool compilet::parse(const std::string &file_name)
454 {
455  if(file_name=="-")
456  return parse_stdin();
457 
458  #ifdef _MSC_VER
459  std::ifstream infile(widen(file_name));
460  #else
461  std::ifstream infile(file_name);
462  #endif
463 
464  if(!infile)
465  {
466  error() << "failed to open input file `" << file_name << "'" << eom;
467  return true;
468  }
469 
470  std::unique_ptr<languaget> languagep;
471 
472  // Using '-x', the type of a file can be overridden;
473  // otherwise, it's guessed from the extension.
474 
475  if(override_language!="")
476  {
477  if(override_language=="c++" || override_language=="c++-header")
478  languagep = get_language_from_mode(ID_cpp);
479  else
480  languagep = get_language_from_mode(ID_C);
481  }
482  else
483  languagep=get_language_from_filename(file_name);
484 
485  if(languagep==nullptr)
486  {
487  error() << "failed to figure out type of file `" << file_name << "'" << eom;
488  return true;
489  }
490 
492 
493  language_filet &lf=language_files.add_file(file_name);
494  lf.language=std::move(languagep);
495 
496  if(mode==PREPROCESS_ONLY)
497  {
498  statistics() << "Preprocessing: " << file_name << eom;
499 
500  std::ostream *os = &std::cout;
501  std::ofstream ofs;
502 
503  if(cmdline.isset('o'))
504  {
505  ofs.open(cmdline.get_value('o'));
506  os = &ofs;
507 
508  if(!ofs.is_open())
509  {
510  error() << "failed to open output file `"
511  << cmdline.get_value('o') << "'" << eom;
512  return true;
513  }
514  }
515 
516  lf.language->preprocess(infile, file_name, *os);
517  }
518  else
519  {
520  statistics() << "Parsing: " << file_name << eom;
521 
522  if(lf.language->parse(infile, file_name))
523  {
525  error() << "PARSING ERROR" << eom;
526  return true;
527  }
528  }
529 
530  lf.get_modules();
531  return false;
532 }
533 
537 {
538  ansi_c_languaget language;
539 
541 
542  statistics() << "Parsing: (stdin)" << eom;
543 
544  if(mode==PREPROCESS_ONLY)
545  {
546  std::ostream *os = &std::cout;
547  std::ofstream ofs;
548 
549  if(cmdline.isset('o'))
550  {
551  ofs.open(cmdline.get_value('o'));
552  os = &ofs;
553 
554  if(!ofs.is_open())
555  {
556  error() << "failed to open output file `"
557  << cmdline.get_value('o') << "'" << eom;
558  return true;
559  }
560  }
561 
562  language.preprocess(std::cin, "", *os);
563  }
564  else
565  {
566  if(language.parse(std::cin, ""))
567  {
569  error() << "PARSING ERROR" << eom;
570  return true;
571  }
572  }
573 
574  return false;
575 }
576 
582  const std::string &file_name,
583  const symbol_tablet &lsymbol_table,
584  goto_functionst &functions)
585 {
586  return write_bin_object_file(file_name, lsymbol_table, functions);
587 }
588 
594  const std::string &file_name,
595  const symbol_tablet &lsymbol_table,
596  goto_functionst &functions)
597 {
598  statistics() << "Writing binary format object `"
599  << file_name << "'" << eom;
600 
601  // symbols
602  statistics() << "Symbols in table: "
603  << lsymbol_table.symbols.size() << eom;
604 
605  std::ofstream outfile(file_name, std::ios::binary);
606 
607  if(!outfile.is_open())
608  {
609  error() << "Error opening file `" << file_name << "'" << eom;
610  return true;
611  }
612 
613  if(write_goto_binary(outfile, lsymbol_table, functions))
614  return true;
615 
616  unsigned cnt=function_body_count(functions);
617 
618  statistics() << "Functions: " << functions.function_map.size()
619  << "; " << cnt << " have a body." << eom;
620 
621  outfile.close();
622  wrote_object=true;
623 
624  return false;
625 }
626 
629 bool compilet::parse_source(const std::string &file_name)
630 {
631  if(parse(file_name))
632  return true;
633 
634  if(typecheck()) // we just want to typecheck this one file here
635  return true;
636 
637  if((has_suffix(file_name, ".class") ||
638  has_suffix(file_name, ".jar")) &&
639  final())
640  return true;
641 
642  // so we remove it from the list afterwards
643  language_files.remove_file(file_name);
644  return false;
645 }
646 
649 compilet::compilet(cmdlinet &_cmdline, ui_message_handlert &mh, bool Werror):
650  language_uit(_cmdline, mh),
651  ns(symbol_table),
652  cmdline(_cmdline),
653  warning_is_fatal(Werror)
654 {
656  echo_file_name=false;
657  wrote_object=false;
659 }
660 
664 {
665  // clean up temp dirs
666 
667  for(std::list<std::string>::const_iterator it = tmp_dirs.begin();
668  it!=tmp_dirs.end();
669  it++)
670  delete_directory(*it);
671 }
672 
673 unsigned compilet::function_body_count(const goto_functionst &functions) const
674 {
675  int fbs=0;
676 
677  for(goto_functionst::function_mapt::const_iterator it=
678  functions.function_map.begin();
679  it != functions.function_map.end();
680  it++)
681  if(it->second.body_available())
682  fbs++;
683 
684  return fbs;
685 }
686 
688 {
689  config.ansi_c.defines.push_back(
690  std::string("__GOTO_CC_VERSION__=") + CBMC_VERSION);
691 }
692 
694 {
696 
697  // the compilation may add symbols!
698 
700 
701  while(before!=symbol_table.symbols.size())
702  {
703  before=symbol_table.symbols.size();
704 
705  typedef std::set<irep_idt> symbols_sett;
706  symbols_sett symbols;
707 
708  for(const auto &named_symbol : symbol_table.symbols)
709  symbols.insert(named_symbol.first);
710 
711  // the symbol table iterators aren't stable
712  for(symbols_sett::const_iterator
713  it=symbols.begin();
714  it!=symbols.end();
715  ++it)
716  {
717  symbol_tablet::symbolst::const_iterator s_it=
718  symbol_table.symbols.find(*it);
719  assert(s_it!=symbol_table.symbols.end());
720 
721  if(s_it->second.type.id()==ID_code &&
722  !s_it->second.is_macro &&
723  !s_it->second.is_type &&
724  s_it->second.value.id()!="compiled" &&
725  s_it->second.value.is_not_nil())
726  {
727  debug() << "Compiling " << s_it->first << eom;
728  converter.convert_function(s_it->first, dest.function_map[s_it->first]);
729  symbol_table.get_writeable_ref(*it).value=exprt("compiled");
730  }
731  }
732  }
733 }
734 
736 {
737  for(const auto &pair : symbol_table.symbols)
738  {
739  const irep_idt &name=pair.second.name;
740  const typet &new_type=pair.second.type;
741  if(!(has_prefix(id2string(name), CPROVER_PREFIX) && new_type.id()==ID_code))
742  continue;
743 
744  bool inserted;
745  std::map<irep_idt, symbolt>::iterator old;
746  std::tie(old, inserted)=written_macros.insert({name, pair.second});
747 
748  if(!inserted && old->second.type!=new_type)
749  {
750  error() << "Incompatible CPROVER macro symbol types:" << eom
751  << old->second.type.pretty() << "(at " << old->second.location
752  << ")" << eom << "and" << eom << new_type.pretty()
753  << "(at " << pair.second.location << ")" << eom;
754  return true;
755  }
756  }
757  return false;
758 }
std::string concat_dir_file(const std::string &directory, const std::string &file_name)
Definition: file_util.cpp:134
The type of an expression.
Definition: type.h:22
symbol_tablet symbol_table
Definition: language_ui.h:25
bool add_input_file(const std::string &)
puts input file names into a list and does preprocessing for libraries.
Definition: compile.cpp:184
struct configt::ansi_ct ansi_c
std::string output_file_executable
Definition: compile.h:48
compilet(cmdlinet &_cmdline, ui_message_handlert &mh, bool Werror)
constructor
Definition: compile.cpp:649
static int8_t r
Definition: irep_hash.h:59
const std::string & id2string(const irep_idt &d)
Definition: irep.h:44
Globally accessible architectural configuration.
Definition: config.h:24
virtual bool parse()
Definition: language_ui.cpp:38
virtual void clear() override
Definition: symbol_table.h:97
static file_typet detect_file_type(const std::string &file_name)
Definition: compile.cpp:145
std::wstring widen(const char *s)
Definition: unicode.cpp:46
std::string pretty(unsigned indent=0, unsigned max_indent=0) const
Definition: irep.cpp:641
#define CPROVER_PREFIX
std::unique_ptr< languaget > get_language_from_mode(const irep_idt &mode)
Get the language corresponding to the given mode.
Definition: mode.cpp:50
uit get_ui()
Definition: language_ui.h:48
std::list< std::string > tmp_dirs
Definition: compile.h:44
cmdlinet & cmdline
Definition: compile.h:97
std::list< std::string > libraries
Definition: compile.h:43
symbolt & get_writeable_ref(const irep_idt &name)
Find a symbol in the symbol table for read-write access.
std::list< std::string > defines
Definition: config.h:120
enum compilet::@2 mode
bool is_goto_binary(const std::string &filename)
Read Goto Programs.
std::list< std::string > library_paths
Definition: compile.h:40
language_filest language_files
Definition: language_ui.h:24
language_filet & add_file(const std::string &filename)
Definition: language_file.h:76
exprt value
Initial value of symbol.
Definition: symbol.h:37
std::string get_value(char option) const
Definition: cmdline.cpp:45
virtual bool typecheck()
Definition: language_ui.cpp:92
function_mapt function_map
const cmdlinet & _cmdline
Definition: language_ui.h:54
unsignedbv_typet size_type()
Definition: c_types.cpp:58
bool doit()
reads and source and object files, compiles and links them into goto program objects.
Definition: compile.cpp:70
void convert_function(const irep_idt &identifier, goto_functionst::goto_functiont &result)
std::unique_ptr< languaget > get_language_from_filename(const std::string &filename)
Get the language corresponding to the registered file name extensions.
Definition: mode.cpp:101
bool find_library(const std::string &)
tries to find a library object file that matches the given library name.
Definition: compile.cpp:300
static mstreamt & eom(mstreamt &m)
Definition: message.h:272
bool parse(std::istream &instream, const std::string &path) override
configt config
Definition: config.cpp:23
std::string get_base_name(const std::string &in, bool strip_suffix)
cleans a filename from path and extension
file_typet
Definition: compile.cpp:134
bool write_bin_object_file(const std::string &, const symbol_tablet &, goto_functionst &)
writes the goto functions in the function table to a binary format object file.
Definition: compile.cpp:593
std::string get_current_working_directory()
Definition: file_util.cpp:45
bool parse_stdin()
parses a source file (low-level parsing)
Definition: compile.cpp:536
mstreamt & warning() const
Definition: message.h:307
std::unique_ptr< languaget > language
Definition: language_file.h:46
const irep_idt & id() const
Definition: irep.h:259
argst args
Definition: cmdline.h:37
virtual bool isset(char option) const
Definition: cmdline.cpp:27
bool compile()
parses source files and writes object files, or keeps the symbols in the symbol_table depending on th...
Definition: compile.cpp:386
bool write_goto_binary(std::ostream &out, const goto_modelt &goto_model, int version)
Writes a goto program to disc.
bool preprocess(std::istream &instream, const std::string &path, std::ostream &outstream) override
ANSI-C preprocessing.
bool warning_is_fatal
Definition: compile.h:98
bool parse_source(const std::string &)
parses a source file
Definition: compile.cpp:629
#define INITIALIZE_FUNCTION
goto_functionst compiled_functions
Definition: compile.h:27
void delete_directory(const std::string &path)
deletes all files in &#39;path&#39; and then the directory itself
Definition: file_util.cpp:95
The symbol table.
Definition: symbol_table.h:19
mstreamt & error() const
Definition: message.h:302
bool has_prefix(const std::string &s, const std::string &prefix)
Definition: converter.cpp:13
virtual void set_message_handler(message_handlert &_message_handler)
Definition: message.h:148
bool ansi_c_entry_point(symbol_tablet &symbol_table, message_handlert &message_handler)
dstringt has one field, an unsigned integer no which is an index into a static table of strings...
Definition: dstring.h:33
bool wrote_object
Definition: compile.h:114
bool echo_file_name
Definition: compile.h:28
const symbolst & symbols
std::string working_directory
Definition: compile.h:29
unsigned function_body_count(const goto_functionst &) const
Definition: compile.cpp:673
bool write_object_file(const std::string &, const symbol_tablet &, goto_functionst &)
writes the goto functions in the function table to a binary format object file.
Definition: compile.cpp:581
std::map< irep_idt, symbolt > written_macros
Definition: compile.h:107
Compile and link source and object files.
bool add_files_from_archive(const std::string &file_name, bool thin_archive)
extracts goto binaries from AR archive and add them as input files.
Definition: compile.cpp:224
message_handlert & get_message_handler()
Definition: message.h:153
Goto Programs with Functions.
static irep_idt entry_point()
std::string get_temporary_directory(const std::string &name_template)
Definition: tempdir.cpp:32
void convert_symbols(goto_functionst &dest)
Definition: compile.cpp:693
const char * CBMC_VERSION
Definition: version.cpp:1
bool remove(const irep_idt &name)
Remove a symbol from the symbol table.
Base class for all expressions.
Definition: expr.h:42
#define UNREACHABLE
Definition: invariant.h:271
Program Transformation.
std::string object_file_extension
Definition: compile.h:47
std::list< std::string > source_files
Definition: compile.h:41
bool has_suffix(const std::string &s, const std::string &suffix)
Definition: suffix.h:15
std::list< std::string > object_files
Definition: compile.h:42
mstreamt & debug() const
Definition: message.h:332
void remove_file(const std::string &filename)
Definition: language_file.h:82
bool link()
parses object files and links them
Definition: compile.cpp:343
void add_compiler_specific_defines(class configt &config) const
Definition: compile.cpp:687
std::string output_directory_object
Definition: compile.h:51
bool read_object_and_link(const std::string &file_name, goto_modelt &dest, message_handlert &message_handler)
reads an object file
std::string override_language
Definition: compile.h:30
Write GOTO binaries.
bool add_written_cprover_symbols(const symbol_tablet &symbol_table)
Definition: compile.cpp:735
std::string output_file_object
Definition: compile.h:51
mstreamt & statistics() const
Definition: message.h:322
~compilet()
cleans up temporary files
Definition: compile.cpp:663
unsigned get_message_count(unsigned level) const
Definition: message.h:68