diff --git a/CMakeLists.txt b/CMakeLists.txt index 552454045af..281c306b7ee 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -150,6 +150,7 @@ if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang" OR "$" "$" "$" + "$" "$" "$" "$" @@ -213,6 +214,8 @@ cprover_default_properties( cbmc cbmc-lib cpp + crangler + crangler-lib driver goto-analyzer goto-analyzer-lib diff --git a/regression/CMakeLists.txt b/regression/CMakeLists.txt index 48f02e64c03..91579216566 100644 --- a/regression/CMakeLists.txt +++ b/regression/CMakeLists.txt @@ -83,3 +83,7 @@ if(WITH_MEMORY_ANALYZER) add_subdirectory(memory-analyzer) add_subdirectory(extract_type_header) endif() + +if(NOT WIN32) + add_subdirectory(crangler) +endif() diff --git a/regression/Makefile b/regression/Makefile index 4e6f9bf85ca..7291631dbba 100644 --- a/regression/Makefile +++ b/regression/Makefile @@ -3,6 +3,7 @@ # listed with decreasing runtimes (i.e. longest running at the top) DIRS = cbmc \ cbmc-library \ + crangler \ goto-analyzer \ ansi-c \ goto-instrument \ diff --git a/regression/crangler/CMakeLists.txt b/regression/crangler/CMakeLists.txt new file mode 100644 index 00000000000..face63a0da1 --- /dev/null +++ b/regression/crangler/CMakeLists.txt @@ -0,0 +1,3 @@ +add_test_pl_tests( + "$" +) diff --git a/regression/crangler/Makefile b/regression/crangler/Makefile new file mode 100644 index 00000000000..b07a6826202 --- /dev/null +++ b/regression/crangler/Makefile @@ -0,0 +1,27 @@ +default: tests.log + +include ../../src/config.inc +include ../../src/common + +ifeq ($(BUILD_ENV_),MSVC) +test: + +tests.log: + +else +test: + @../test.pl -e -p -c '../../../src/crangler/crangler' + +tests.log: + @../test.pl -e -p -c '../../../src/crangler/crangler' +endif + +clean: + @for dir in *; do \ + $(RM) tests.log; \ + if [ -d "$$dir" ]; then \ + cd "$$dir"; \ + $(RM) *.out *.gb; \ + cd ..; \ + fi \ + done diff --git a/regression/crangler/remove-static-object/remove_static1.c b/regression/crangler/remove-static-object/remove_static1.c new file mode 100644 index 00000000000..3a4d13000f1 --- /dev/null +++ b/regression/crangler/remove-static-object/remove_static1.c @@ -0,0 +1,16 @@ +int foo() +{ + return 0; +} + +int bar(); + +static void foobar1() +{ +} + +void static foobar2() +{ +} + +static short x; diff --git a/regression/crangler/remove-static-object/remove_static1.desc b/regression/crangler/remove-static-object/remove_static1.desc new file mode 100644 index 00000000000..6c05608f2b9 --- /dev/null +++ b/regression/crangler/remove-static-object/remove_static1.desc @@ -0,0 +1,8 @@ +CORE +remove_static1.json + +^\s+void foobar1\(\)$ +^\s+short x;$ +^EXIT=0$ +^SIGNAL=0$ +-- diff --git a/regression/crangler/remove-static-object/remove_static1.json b/regression/crangler/remove-static-object/remove_static1.json new file mode 100644 index 00000000000..edf955c7c93 --- /dev/null +++ b/regression/crangler/remove-static-object/remove_static1.json @@ -0,0 +1,20 @@ +{ + "sources": [ + "remove_static1.c" + ], + "functions": [ + { + "foobar1": [ + "remove static" + ] + } + ], + "objects": [ + { + "x": [ + "remove static" + ] + } + ], + "output": "stdout" +} diff --git a/regression/crangler/remove-static-regex/remove_static1.c b/regression/crangler/remove-static-regex/remove_static1.c new file mode 100644 index 00000000000..10df0a9e611 --- /dev/null +++ b/regression/crangler/remove-static-regex/remove_static1.c @@ -0,0 +1,14 @@ +int foo() +{ + return 0; +} + +int bar(); + +static void foobar1() +{ +} + +void static foobar2() +{ +} diff --git a/regression/crangler/remove-static-regex/remove_static1.desc b/regression/crangler/remove-static-regex/remove_static1.desc new file mode 100644 index 00000000000..ca027e08038 --- /dev/null +++ b/regression/crangler/remove-static-regex/remove_static1.desc @@ -0,0 +1,8 @@ +CORE +remove_static1.json + +^\s+void foobar1\(\)$ +^void\s+foobar2\(\)$ +^EXIT=0$ +^SIGNAL=0$ +-- diff --git a/regression/crangler/remove-static-regex/remove_static1.json b/regression/crangler/remove-static-regex/remove_static1.json new file mode 100644 index 00000000000..b08b84f4355 --- /dev/null +++ b/regression/crangler/remove-static-regex/remove_static1.json @@ -0,0 +1,13 @@ +{ + "sources": [ + "remove_static1.c" + ], + "functions": [ + { + "foobar[12]": [ + "remove static" + ] + } + ], + "output": "stdout" +} diff --git a/regression/crangler/remove-static/remove_static1.c b/regression/crangler/remove-static/remove_static1.c new file mode 100644 index 00000000000..10df0a9e611 --- /dev/null +++ b/regression/crangler/remove-static/remove_static1.c @@ -0,0 +1,14 @@ +int foo() +{ + return 0; +} + +int bar(); + +static void foobar1() +{ +} + +void static foobar2() +{ +} diff --git a/regression/crangler/remove-static/remove_static1.desc b/regression/crangler/remove-static/remove_static1.desc new file mode 100644 index 00000000000..ca027e08038 --- /dev/null +++ b/regression/crangler/remove-static/remove_static1.desc @@ -0,0 +1,8 @@ +CORE +remove_static1.json + +^\s+void foobar1\(\)$ +^void\s+foobar2\(\)$ +^EXIT=0$ +^SIGNAL=0$ +-- diff --git a/regression/crangler/remove-static/remove_static1.json b/regression/crangler/remove-static/remove_static1.json new file mode 100644 index 00000000000..58950d8cb06 --- /dev/null +++ b/regression/crangler/remove-static/remove_static1.json @@ -0,0 +1,18 @@ +{ + "sources": [ + "remove_static1.c" + ], + "functions": [ + { + "foobar1": [ + "remove static" + ] + }, + { + "foobar2": [ + "remove static" + ] + } + ], + "output": "stdout" +} diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 0354bf9d757..1578ca05d4f 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -106,6 +106,7 @@ add_subdirectory(statement-list) add_subdirectory(util) add_subdirectory(cbmc) +add_subdirectory(crangler) add_subdirectory(goto-cc) add_subdirectory(goto-instrument) add_subdirectory(goto-analyzer) diff --git a/src/Makefile b/src/Makefile index bec62a6cd5c..86d03ffbe90 100644 --- a/src/Makefile +++ b/src/Makefile @@ -4,6 +4,7 @@ DIRS = analyses \ big-int \ cbmc \ cpp \ + crangler \ goto-analyzer \ goto-cc \ goto-checker \ @@ -27,6 +28,7 @@ DIRS = analyses \ # Empty last line all: cbmc.dir \ + crangler.dir \ goto-analyzer.dir \ goto-cc.dir \ goto-diff.dir \ @@ -59,6 +61,8 @@ $(patsubst %, %.dir, $(filter-out big-int util, $(DIRS))): util.dir cpp.dir: ansi-c.dir linking.dir +crangler.dir: util.dir json.dir + languages: util.dir langapi.dir \ cpp.dir ansi-c.dir xmllang.dir assembler.dir \ jsil.dir json.dir json-symtab-language.dir statement-list.dir diff --git a/src/crangler/CMakeLists.txt b/src/crangler/CMakeLists.txt new file mode 100644 index 00000000000..0bf356902a8 --- /dev/null +++ b/src/crangler/CMakeLists.txt @@ -0,0 +1,26 @@ +generic_flex(c) + +# Library +file(GLOB_RECURSE sources "*.cpp" "*.h") +list(REMOVE_ITEM sources + ${CMAKE_CURRENT_SOURCE_DIR}/crangler_main.cpp +) + +add_library(crangler-lib + ${sources} + ${FLEX_scanner_OUTPUTS} +) + +generic_includes(crangler-lib) + +target_link_libraries(crangler-lib + big-int + util + json +) + +# Executable +add_executable(crangler crangler_main.cpp) +target_link_libraries(crangler crangler-lib) + +install(TARGETS crangler DESTINATION ${CMAKE_INSTALL_BINDIR}) diff --git a/src/crangler/Makefile b/src/crangler/Makefile new file mode 100644 index 00000000000..8583aa9039b --- /dev/null +++ b/src/crangler/Makefile @@ -0,0 +1,36 @@ +SRC = c_defines.cpp \ + c_lex.yy.cpp \ + c_wrangler.cpp \ + crangler_main.cpp \ + crangler_parse_options.cpp \ + cscanner.cpp \ + ctokenit.cpp \ + mini_c_parser.cpp \ + # Empty last line + +OBJ += ../big-int/big-int$(LIBEXT) \ + ../json/json$(LIBEXT) \ + ../util/util$(LIBEXT) + +INCLUDES= -I .. + +include ../config.inc +include ../../$(CPROVER_DIR)/src/common + +CLEANFILES = crangler$(LIBEXT) + +all: crangler$(EXEEXT) + +############################################################################### + +c_lex.yy.cpp: scanner.l + $(LEX) -Pyyc -o$@ scanner.l + +############################################################################### + +generated_files: c_lex.yy.cpp + +############################################################################### + +crangler$(EXEEXT): $(OBJ) + $(LINKBIN) diff --git a/src/crangler/c_defines.cpp b/src/crangler/c_defines.cpp new file mode 100644 index 00000000000..ad77583b9ea --- /dev/null +++ b/src/crangler/c_defines.cpp @@ -0,0 +1,67 @@ +/*******************************************************************\ + +Module: C Defines + +Author: Daniel Kroening, dkr@amazon.com + +\*******************************************************************/ + +/// \file +/// c_defines + +#include "c_defines.h" + +#include "cscanner.h" + +#include +#include + +#include + +void c_definest::parse(const std::string &src) +{ + const auto lines = split_string(src, '\n'); + for(const auto &line : lines) + { + // #define __x86_64__ 1 + // #define getc_unlocked(fp) __sgetc(fp) + if(!has_prefix(line, "#define ")) + continue; + + auto space_pos = line.find(' ', 8); + if(space_pos == std::string::npos) + continue; + + auto id = line.substr(8, space_pos - 8); + auto value = line.substr(space_pos + 1, std::string::npos); + map[id].value = value; + } +} + +std::string c_definest::operator()(const std::string &src) const +{ + // tokenize + std::istringstream in(src); + cscannert cscanner(in); + const auto tokens = cscanner.get_tokens(); + + // output + std::ostringstream out; + for(auto &t : tokens) + { + if(is_identifier(t)) + { + auto m_it = map.find(t.text); + if(m_it != map.end()) + { + out << m_it->second.value; + } + else + out << t.text; + } + else + out << t.text; + } + + return out.str(); +} diff --git a/src/crangler/c_defines.h b/src/crangler/c_defines.h new file mode 100644 index 00000000000..3dc34ba435a --- /dev/null +++ b/src/crangler/c_defines.h @@ -0,0 +1,39 @@ +/*******************************************************************\ + +Module: C Defines + +Author: Daniel Kroening, dkr@amazon.com + +\*******************************************************************/ + +/// \file +/// c_defines + +#ifndef CPROVER_CRANGLER_C_DEFINES_H +#define CPROVER_CRANGLER_C_DEFINES_H + +#include + +#include +#include +#include + +/// This class maintains a representation of one assignment to the +/// preprocessor macros in a C program. +class c_definest +{ +public: + struct definet + { + optionalt> parameters; + std::string value; + }; + + using mapt = std::unordered_map; + mapt map; + + void parse(const std::string &); + std::string operator()(const std::string &) const; +}; + +#endif // CPROVER_CRANGLER_C_DEFINES_H diff --git a/src/crangler/c_wrangler.cpp b/src/crangler/c_wrangler.cpp new file mode 100644 index 00000000000..369bf736f8e --- /dev/null +++ b/src/crangler/c_wrangler.cpp @@ -0,0 +1,580 @@ +/*******************************************************************\ + +Module: C Wrangler + +Author: Daniel Kroening, dkr@amazon.com + +\*******************************************************************/ + +/// \file +/// C Wrangler + +#include "c_wrangler.h" + +#include "c_defines.h" +#include "ctokenit.h" +#include "mini_c_parser.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +struct c_wranglert +{ + // sources and preprocessing + std::vector source_files; + std::vector includes; + std::vector defines; + + // transformations + struct contract_clauset + { + std::string clause; + std::string content; + contract_clauset(std::string _clause, std::string _content) + : clause(std::move(_clause)), content(std::move(_content)) + { + } + }; + + struct loop_invariantt + { + std::string loop_type; + std::string identifier; + std::string content; + loop_invariantt( + std::string _loop_type, + std::string _identifier, + std::string _content) + : loop_type(std::move(_loop_type)), + identifier(std::move(_identifier)), + content(std::move(_content)) + { + } + }; + + struct assertiont + { + std::string identifier; + std::string content; + assertiont(std::string _identifier, std::string _content) + : identifier(std::move(_identifier)), content(std::move(_content)) + { + } + }; + + struct functiont + { + // should be variant to preserve ordering + std::vector contract; + std::vector loop_invariants; + std::vector assertions; + optionalt stub; + bool remove_static = false; + }; + + using functionst = std::list>; + functionst functions; + + struct objectt + { + bool remove_static = false; + }; + + using objectst = std::list>; + objectst objects; + + // output + std::string output; + + void configure_sources(const jsont &); + void configure_functions(const jsont &); + void configure_objects(const jsont &); + void configure_output(const jsont &); +}; + +void c_wranglert::configure_sources(const jsont &config) +{ + auto sources = config["sources"]; + + if(!sources.is_null()) + { + if(!sources.is_array()) + throw deserialization_exceptiont("sources entry must be sequence"); + + for(const auto &source : to_json_array(sources)) + { + if(!source.is_string()) + throw deserialization_exceptiont("source must be string"); + + this->source_files.push_back(source.value); + } + } + + auto includes = config["includes"]; + + if(!includes.is_null()) + { + if(!includes.is_array()) + throw deserialization_exceptiont("includes entry must be sequence"); + + for(const auto &include : to_json_array(includes)) + { + if(!include.is_string()) + throw deserialization_exceptiont("include must be string"); + + this->includes.push_back(include.value); + } + } + + auto defines = config["defines"]; + + if(!defines.is_null()) + { + if(!defines.is_array()) + throw deserialization_exceptiont("defines entry must be sequence"); + + for(const auto &define : to_json_array(defines)) + { + if(!define.is_string()) + throw deserialization_exceptiont("define must be string"); + + this->defines.push_back(define.value); + } + } +} + +void c_wranglert::configure_functions(const jsont &config) +{ + auto functions = config["functions"]; + + if(functions.is_null()) + return; + + if(!functions.is_array()) + throw deserialization_exceptiont("functions entry must be sequence"); + + for(const auto &function : to_json_array(functions)) + { + if(!function.is_object()) + throw deserialization_exceptiont("function entry must be object"); + + for(const auto &function_entry : to_json_object(function)) + { + const auto function_name = function_entry.first; + const auto &items = function_entry.second; + + if(!items.is_array()) + throw deserialization_exceptiont("function entry must be sequence"); + + this->functions.emplace_back(function_name, functiont{}); + functiont &function_config = this->functions.back().second; + + for(const auto &function_item : to_json_array(items)) + { + // These need to start with "ensures", "requires", "assigns", + // "invariant", "assert", "stub", "remove" + if(!function_item.is_string()) + throw deserialization_exceptiont("function entry must be string"); + + auto item_string = function_item.value; + auto split = split_string(item_string, ' '); + if(split.empty()) + continue; + + if( + split[0] == "ensures" || split[0] == "requires" || + split[0] == "assigns") + { + std::ostringstream rest; + join_strings(rest, split.begin() + 1, split.end(), ' '); + + function_config.contract.emplace_back(split[0], rest.str()); + } + else if(split[0] == "assert" && split.size() >= 3) + { + std::ostringstream rest; + join_strings(rest, split.begin() + 2, split.end(), ' '); + + function_config.assertions.emplace_back(split[1], rest.str()); + } + else if( + (split[0] == "for" && split.size() >= 3 && split[2] == "invariant") || + (split[0] == "while" && split.size() >= 3 && split[2] == "invariant")) + { + std::ostringstream rest; + join_strings(rest, split.begin() + 3, split.end(), ' '); + + function_config.loop_invariants.emplace_back( + split[0], split[1], rest.str()); + } + else if(split[0] == "stub") + { + std::ostringstream rest; + join_strings(rest, split.begin() + 1, split.end(), ' '); + + function_config.stub = rest.str(); + } + else if(split[0] == "remove") + { + if(split.size() == 1) + throw deserialization_exceptiont("unexpected remove entry"); + + if(split[1] == "static") + function_config.remove_static = true; + else + throw deserialization_exceptiont( + "unexpected remove entry " + split[1]); + } + else + throw deserialization_exceptiont( + "unexpected function entry " + split[0]); + } + } + } +} + +void c_wranglert::configure_objects(const jsont &config) +{ + auto objects = config["objects"]; + + if(objects.is_null()) + return; + + if(!objects.is_array()) + throw deserialization_exceptiont("objects entry must be sequence"); + + for(const auto &object : to_json_array(objects)) + { + if(!object.is_object()) + throw deserialization_exceptiont("object entry must be object"); + + for(const auto &object_entry : to_json_object(object)) + { + const auto &object_name = object_entry.first; + const auto &items = object_entry.second; + + if(!items.is_array()) + throw deserialization_exceptiont("object entry must be sequence"); + + this->objects.emplace_back(object_name, objectt{}); + objectt &object_config = this->objects.back().second; + + for(const auto &object_item : to_json_array(items)) + { + // Needs to start with "remove" + if(!object_item.is_string()) + throw deserialization_exceptiont("object entry must be string"); + + auto item_string = object_item.value; + auto split = split_string(item_string, ' '); + if(split.empty()) + continue; + + if(split[0] == "remove") + { + if(split.size() == 1) + throw deserialization_exceptiont("unexpected remove entry"); + + if(split[1] == "static") + object_config.remove_static = true; + else + throw deserialization_exceptiont( + "unexpected remove entry " + split[1]); + } + else + throw deserialization_exceptiont( + "unexpected object entry " + split[0]); + } + } + } +} + +void c_wranglert::configure_output(const jsont &config) +{ + auto output = config["output"]; + + if(output.is_null()) + return; + + if(!output.is_string()) + throw deserialization_exceptiont("output entry must be string"); + + this->output = output.value; +} + +static std::string +preprocess(const std::string &source_file, const c_wranglert &c_wrangler) +{ + std::vector argv = {"cc", "-E", source_file}; + + for(const auto &include : c_wrangler.includes) + { + argv.push_back("-I"); + argv.push_back(include); + } + + for(const auto &define : c_wrangler.defines) + argv.push_back(std::string("-D") + define); + + std::ostringstream result; + + auto run_result = run("cc", argv, "", result, ""); + if(run_result != 0) + throw system_exceptiont("preprocessing " + source_file + " has failed"); + + return result.str(); +} + +static c_definest +get_defines(const std::string &source_file, const c_wranglert &config) +{ + std::vector argv = {"cc", "-E", "-dM", source_file}; + + for(const auto &include : config.includes) + { + argv.push_back("-I"); + argv.push_back(include); + } + + std::ostringstream result; + + auto run_result = run("cc", argv, "", result, ""); + if(run_result != 0) + throw system_exceptiont("preprocessing " + source_file + " has failed"); + + c_definest defines; + defines.parse(result.str()); + return defines; +} + +static void mangle_function( + const c_declarationt &declaration, + const c_definest &defines, + const c_wranglert::functiont &function_config, + std::ostream &out) +{ + if(function_config.stub.has_value()) + { + // replace by stub + out << function_config.stub.value(); + } + else + { + if(function_config.remove_static) + { + for(auto &t : declaration.pre_declarator) + { + if(t.text == "static") + { + // we replace by white space + out << std::string(6, ' '); + } + else + out << t.text; + } + } + else + { + for(auto &t : declaration.pre_declarator) + out << t.text; + } + + for(auto &t : declaration.declarator) + out << t.text; + for(auto &t : declaration.post_declarator) + out << t.text; + + for(const auto &entry : function_config.contract) + out << ' ' << CPROVER_PREFIX << entry.clause << '(' + << defines(entry.content) << ')'; + + std::map loop_invariants; + + for(const auto &entry : function_config.loop_invariants) + loop_invariants[entry.loop_type + entry.identifier] = entry.content; + + if(loop_invariants.empty()) + { + for(auto &t : declaration.initializer) + out << t.text; + } + else + { + std::size_t for_count = 0, while_count = 0; + ctokenitt t(declaration.initializer); + + while(t) + { + const auto &token = *(t++); + out << token.text; + + if(token == "while") + { + while_count++; + const auto &invariant = + loop_invariants["while" + std::to_string(while_count)]; + + if(!invariant.empty()) + { + auto t_end = match_bracket(t, '(', ')'); + for(; t != t_end; t++) + out << t->text; + out << ' ' << CPROVER_PREFIX << "loop_invariant(" + << defines(invariant) << ')'; + } + } + else if(token == "for") + { + for_count++; + const auto &invariant = + loop_invariants["for" + std::to_string(for_count)]; + + if(!invariant.empty()) + { + auto t_end = match_bracket(t, '(', ')'); + for(; t != t_end; t++) + out << t->text; + out << ' ' << CPROVER_PREFIX << "invariant(" << defines(invariant) + << ')'; + } + } + } + } + } +} + +static void mangle_object( + const c_declarationt &declaration, + const c_definest &defines, + const c_wranglert::objectt &object_config, + std::ostream &out) +{ + if(object_config.remove_static) + { + for(auto &t : declaration.pre_declarator) + { + if(t.text == "static") + { + // we replace by white space + out << std::string(6, ' '); + } + else + out << t.text; + } + } + else + { + for(auto &t : declaration.pre_declarator) + out << t.text; + } + + for(auto &t : declaration.declarator) + out << t.text; + for(auto &t : declaration.post_declarator) + out << t.text; + for(auto &t : declaration.initializer) + out << t.text; +} + +static void mangle( + const c_declarationt &declaration, + const c_definest &defines, + const c_wranglert &config, + std::ostream &out) +{ + auto name_opt = declaration.declared_identifier(); + if( + declaration.is_function() && name_opt.has_value() && declaration.has_body()) + { + for(const auto &entry : config.functions) + { + if(std::regex_match(name_opt->text, entry.first)) + { + // we are to modify this function + mangle_function(declaration, defines, entry.second, out); + + return; + } + } + } + else if(!declaration.is_function() && name_opt.has_value()) + { + for(const auto &entry : config.objects) + { + if(std::regex_match(name_opt->text, entry.first)) + { + // we are to modify this function + mangle_object(declaration, defines, entry.second, out); + + return; + } + } + } + + // output + out << declaration; +} + +static std::string mangle( + const std::string &in, + const c_definest &defines, + const c_wranglert &config) +{ + std::ostringstream out; + std::istringstream in_str(in); + + auto parsed = parse_c(in_str); + + for(const auto &declaration : parsed) + mangle(declaration, defines, config, out); + + return out.str(); +} + +void c_wrangler(const jsont &config) +{ + c_wranglert c_wrangler; + + c_wrangler.configure_sources(config); + c_wrangler.configure_functions(config); + c_wrangler.configure_objects(config); + c_wrangler.configure_output(config); + + for(auto &source_file : c_wrangler.source_files) + { + // first preprocess + auto preprocessed = preprocess(source_file, c_wrangler); + + // get the defines + auto defines = get_defines(source_file, c_wrangler); + + // now mangle + auto mangled = mangle(preprocessed, defines, c_wrangler); + + // now output + if(c_wrangler.output == "stdout" || c_wrangler.output.empty()) + { + std::cout << mangled; + } + else + { + std::ofstream out(c_wrangler.output); + out << mangled; + } + } +} diff --git a/src/crangler/c_wrangler.h b/src/crangler/c_wrangler.h new file mode 100644 index 00000000000..a5c6579d0f8 --- /dev/null +++ b/src/crangler/c_wrangler.h @@ -0,0 +1,19 @@ +/*******************************************************************\ + +Module: C Wrangler + +Author: Daniel Kroening, dkr@amazon.com + +\*******************************************************************/ + +/// \file +/// C Wrangler + +#ifndef CPROVER_CRANGLER_C_WRANGLER_H +#define CPROVER_CRANGLER_C_WRANGLER_H + +class jsont; + +void c_wrangler(const jsont &); + +#endif // CPROVER_CRANGLER_C_WRANGLER_H diff --git a/src/crangler/crangler_main.cpp b/src/crangler/crangler_main.cpp new file mode 100644 index 00000000000..b85dd283a37 --- /dev/null +++ b/src/crangler/crangler_main.cpp @@ -0,0 +1,30 @@ +/*******************************************************************\ + +Module: CRANGLER Main Module + +Author: Daniel Kroening, dkr@amazon.com + +\*******************************************************************/ + +/// \file +/// CRANGLER Main Module + +#include "crangler_parse_options.h" + +#ifdef _MSC_VER +# include +#endif + +#ifdef _MSC_VER +int wmain(int argc, const wchar_t **argv_wide) +{ + auto vec = narrow_argv(argc, argv_wide); + auto narrow = to_c_str_array(std::begin(vec), std::end(vec)); + auto argv = narrow.data(); +#else +int main(int argc, const char **argv) +{ +#endif + crangler_parse_optionst parse_options(argc, argv); + return parse_options.main(); +} diff --git a/src/crangler/crangler_parse_options.cpp b/src/crangler/crangler_parse_options.cpp new file mode 100644 index 00000000000..44ae4522a00 --- /dev/null +++ b/src/crangler/crangler_parse_options.cpp @@ -0,0 +1,60 @@ +/*******************************************************************\ + +Module: CRANGLER Command Line Option Processing + +Author: Daniel Kroening, dkr@amazon.com + +\*******************************************************************/ + +/// \file +/// CRANGLER Command Line Option Processing + +#include "crangler_parse_options.h" + +#include +#include +#include + +#include + +#include + +#include "c_wrangler.h" + +int crangler_parse_optionst::doit() +{ + if(cmdline.args.empty()) + { + std::cerr << "please give a configuration file\n"; + return CPROVER_EXIT_INCORRECT_TASK; + } + + for(const auto &file_name : cmdline.args) + process_crangler_json(file_name); + + return 0; +} + +void crangler_parse_optionst::process_crangler_json( + const std::string &file_name) +{ + console_message_handlert message_handler; + jsont configuration; + + if(parse_json(file_name, message_handler, configuration)) + return; + + c_wrangler(configuration); +} + +void crangler_parse_optionst::help() +{ + std::cout << '\n' + << banner_string("CRANGLER", CBMC_VERSION) << '\n' + << "\n" + "Usage: Purpose:\n" + "\n" + " crangler [-?] [-h] [--help] show help\n" + " crangler file.json ... configuration file names\n" + "\n"; +} diff --git a/src/crangler/crangler_parse_options.h b/src/crangler/crangler_parse_options.h new file mode 100644 index 00000000000..fa813436e25 --- /dev/null +++ b/src/crangler/crangler_parse_options.h @@ -0,0 +1,32 @@ +/*******************************************************************\ + +Module: CRANGLER Command Line Option Processing + +Author: Daniel Kroening, dkr@amazon.com + +\*******************************************************************/ + +/// \file +/// CRANGLER Command Line Option Processing + +#ifndef CPROVER_CRANGLER_CRANGLER_PARSE_OPTIONS_H +#define CPROVER_CRANGLER_CRANGLER_PARSE_OPTIONS_H + +#include + +class crangler_parse_optionst : public parse_options_baset +{ +public: + int doit() override; + void help() override; + + crangler_parse_optionst(int argc, const char **argv) + : parse_options_baset("", argc, argv, "CRANGLER") + { + } + +protected: + void process_crangler_json(const std::string &file_name); +}; + +#endif // CPROVER_CRANGLER_CRANGLER_PARSE_OPTIONS_H diff --git a/src/crangler/cscanner.cpp b/src/crangler/cscanner.cpp new file mode 100644 index 00000000000..0458721bf09 --- /dev/null +++ b/src/crangler/cscanner.cpp @@ -0,0 +1,51 @@ +/*******************************************************************\ + +Module: C Scanner + +Author: Daniel Kroening, dkr@amazon.com + +\*******************************************************************/ + +#include "cscanner.h" + +cscannert *cscanner_ptr; + +int yyclex(); +int yyclex_destroy(); +void initialize_yyc_scanner(); + +cscannert::cscannert(std::istream &_in) : in(_in) +{ + initialize_yyc_scanner(); +} + +cscannert::~cscannert() +{ + yyclex_destroy(); +} + +ctokent cscannert::operator()() +{ + cscanner_ptr = this; + + if(yyclex() == 0) // EOF + { + token.kind = ctokent::END_OF_FILE; + token.text.clear(); + token.line_number = line_number; + } + + return std::move(token); +} + +std::vector cscannert::get_tokens() +{ + std::vector result; + + do + { + result.push_back(this->operator()()); + } while(!is_eof(result.back())); + + return result; +} diff --git a/src/crangler/cscanner.h b/src/crangler/cscanner.h new file mode 100644 index 00000000000..c14798fa68d --- /dev/null +++ b/src/crangler/cscanner.h @@ -0,0 +1,48 @@ +/*******************************************************************\ + +Module: C Scanner + +Author: Daniel Kroening, dkr@amazon.com + +\*******************************************************************/ + +/// \file +/// cscanner + +#ifndef CPROVER_CRANGLER_CSCANNER_H +#define CPROVER_CRANGLER_CSCANNER_H + +#include +#include + +#include "ctoken.h" + +class cscannert +{ +public: + explicit cscannert(std::istream &); + ~cscannert(); + + ctokent operator()(); + + std::istream ∈ + std::size_t line_number = 1; + + bool return_WS_and_comments = false; + + void set_token(std::string text, ctokent::kindt kind) + { + token.line_number = line_number; + token.text = std::move(text); + token.kind = kind; + } + + std::vector get_tokens(); + +protected: + ctokent token; +}; + +extern cscannert *cscanner_ptr; + +#endif // CPROVER_CRANGLER_CSCANNER_H diff --git a/src/crangler/ctoken.cpp b/src/crangler/ctoken.cpp new file mode 100644 index 00000000000..a7f64a0b9b2 --- /dev/null +++ b/src/crangler/ctoken.cpp @@ -0,0 +1,68 @@ +/*******************************************************************\ + +Module: C Token + +Author: Daniel Kroening, dkr@amazon.com + +\*******************************************************************/ + +/// \file +/// ctoken + +#include "ctoken.h" + +#include + +void ctokent::output(std::ostream &out) const +{ + switch(kind) + { + case END_OF_FILE: + out << "END_OF_FILE"; + break; + case INT_LIT: + out << "INT"; + break; + case CHAR_LIT: + out << "CHAR_LIT"; + break; + case FLOAT_LIT: + out << "FLOAT_LIT"; + break; + case STRING_LIT: + out << "STRING_LIT"; + break; + case C_COMMENT: + out << "C_COMMENT"; + break; + case CPP_COMMENT: + out << "CPP_COMMENT"; + break; + case IDENTIFIER: + out << "IDENTIFIER"; + break; + case OPERATOR: + out << "OPERATOR"; + break; + case WS: + out << "WS"; + break; + case PREPROCESSOR_DIRECTIVE: + out << "PREPROCESSOR_DIRECTIVE"; + break; + case SEPARATOR: + out << "SEPARATOR"; + break; + case UNKNOWN: + out << "UNKNOWN"; + break; + } + + out << ' ' << text; +} + +std::ostream &operator<<(std::ostream &out, const ctokent &t) +{ + t.output(out); + return out; +} diff --git a/src/crangler/ctoken.h b/src/crangler/ctoken.h new file mode 100644 index 00000000000..4bade9bedd9 --- /dev/null +++ b/src/crangler/ctoken.h @@ -0,0 +1,105 @@ +/*******************************************************************\ + +Module: C Token + +Author: Daniel Kroening, dkr@amazon.com + +\*******************************************************************/ + +/// \file +/// ctoken + +#ifndef CPROVER_CRANGLER_CTOKEN_H +#define CPROVER_CRANGLER_CTOKEN_H + +#include +#include + +class ctokent +{ +public: + using kindt = enum { + END_OF_FILE, + INT_LIT, + CHAR_LIT, + FLOAT_LIT, + STRING_LIT, + C_COMMENT, + CPP_COMMENT, + IDENTIFIER, + OPERATOR, + WS, + SEPARATOR, + PREPROCESSOR_DIRECTIVE, + UNKNOWN + }; + + kindt kind; + + // could be string_view, after C++17 + std::string text; + + std::size_t line_number = 0; + + ctokent() = default; + + ctokent(kindt _kind, std::string _text) : kind(_kind), text(std::move(_text)) + { + } + + void output(std::ostream &) const; + + bool operator==(const char *other_text) const + { + return text == other_text; + } + + bool operator==(char some_char) const + { + return text == std::string(1, some_char); + } + + bool operator!=(char some_char) const + { + return text != std::string(1, some_char); + } +}; + +static inline bool is_identifier(const ctokent &t) +{ + return t.kind == ctokent::IDENTIFIER; +} + +static inline bool is_separator(const ctokent &t) +{ + return t.kind == ctokent::SEPARATOR; +} + +static inline bool is_operator(const ctokent &t) +{ + return t.kind == ctokent::OPERATOR; +} + +static inline bool is_ws(const ctokent &t) +{ + return t.kind == ctokent::WS; +} + +static inline bool is_eof(const ctokent &t) +{ + return t.kind == ctokent::END_OF_FILE; +} + +static inline bool is_comment(const ctokent &t) +{ + return t.kind == ctokent::C_COMMENT || t.kind == ctokent::CPP_COMMENT; +} + +static inline bool is_preprocessor_directive(const ctokent &t) +{ + return t.kind == ctokent::PREPROCESSOR_DIRECTIVE; +} + +std::ostream &operator<<(std::ostream &, const ctokent &); + +#endif // CPROVER_CRANGLER_CTOKEN_H diff --git a/src/crangler/ctokenit.cpp b/src/crangler/ctokenit.cpp new file mode 100644 index 00000000000..1067dcfdf62 --- /dev/null +++ b/src/crangler/ctokenit.cpp @@ -0,0 +1,70 @@ +/*******************************************************************\ + +Module: C Token Iterator + +Author: Daniel Kroening, dkr@amazon.com + +\*******************************************************************/ + +/// \file +/// ctokenit + +#include "ctokenit.h" + +#include +#include + +#include + +const ctokent &ctokenitt::operator*() const +{ + PRECONDITION(!eof()); + return tokens[pos]; +} + +ctokenitt ctokenitt::operator++(int) // NOLINT(*) +{ + PRECONDITION(!eof()); + auto pre_increment = *this; // copy + pos++; + return pre_increment; +} + +ctokenitt match_bracket(ctokenitt t, char open, char close) +{ + if(!t) + return t; + + // skip whitespace, if any + while(t && (is_ws(*t) || is_comment(*t) || is_preprocessor_directive(*t))) + t++; + + if(*t != open) + return t; + + std::size_t bracket_count = 0; + while(true) + { + if(!t) + throw invalid_source_file_exceptiont("expected " + std::string(1, close)); + + const auto &token = *(t++); + + if(token == open) + bracket_count++; + else if(token == close) + { + bracket_count--; + if(bracket_count == 0) + return t; // done + } + } +} + +ctokenitt +match_bracket(ctokenitt t, char open, char close, ctokenitt::tokenst &dest) +{ + auto end = match_bracket(t, open, close); + std::copy(t.cit(), end.cit(), dest.end()); + return end; +} diff --git a/src/crangler/ctokenit.h b/src/crangler/ctokenit.h new file mode 100644 index 00000000000..9eaab0d72f0 --- /dev/null +++ b/src/crangler/ctokenit.h @@ -0,0 +1,70 @@ +/*******************************************************************\ + +Module: C Token Iterator + +Author: Daniel Kroening, dkr@amazon.com + +\*******************************************************************/ + +/// \file +/// ctokenit + +#ifndef CPROVER_CRANGLER_CTOKENIT_H +#define CPROVER_CRANGLER_CTOKENIT_H + +#include "cscanner.h" + +class ctokenitt +{ +public: + using tokenst = std::vector; + + explicit ctokenitt(const tokenst &__tokens) : tokens(__tokens) + { + } + + explicit operator bool() const + { + return !eof(); + } + + bool eof() const + { + return pos >= tokens.size(); + } + + ctokenitt &operator+=(std::size_t offset) + { + pos += offset; + return *this; + } + + ctokenitt operator++(int); // postfix ++ + + const ctokent &operator*() const; + + const ctokent *operator->() const + { + return &**this; + } + + tokenst::const_iterator cit() const + { + return tokens.begin() + pos; + } + + bool operator!=(const ctokenitt &other) const + { + return pos != other.pos; + } + +protected: + const tokenst &tokens; + std::size_t pos = 0; +}; + +ctokenitt match_bracket(ctokenitt, char open, char close); +ctokenitt +match_bracket(ctokenitt, char open, char close, ctokenitt::tokenst &dest); + +#endif // CPROVER_CRANGLER_CTOKENIT_H diff --git a/src/crangler/mini_c_parser.cpp b/src/crangler/mini_c_parser.cpp new file mode 100644 index 00000000000..b7f4b5ac76c --- /dev/null +++ b/src/crangler/mini_c_parser.cpp @@ -0,0 +1,372 @@ +/*******************************************************************\ + +Module: Mini C Parser + +Author: Daniel Kroening, dkr@amazon.com + +\*******************************************************************/ + +/// \file +/// Mini C Parser + +#include "mini_c_parser.h" + +#include +#include + +#include "cscanner.h" + +class mini_c_parsert +{ +public: + mini_c_parsert() + { + } + + c_translation_unitt parse(std::istream &); + +protected: + std::size_t token_index; + using tokenst = std::vector; + tokenst tokens; + + bool eof() const + { + return is_eof(peek()); + } + + c_declarationt parse_declaration(); + tokenst parse_pre_declarator(); + tokenst parse_declarator(); + tokenst parse_post_declarator(); + tokenst parse_initializer(); + + const ctokent &peek() const + { + PRECONDITION(token_index < tokens.size()); + return tokens[token_index]; + } + + const ctokent &peek(std::size_t how_many) const + { + PRECONDITION(token_index + how_many < tokens.size()); + return tokens[token_index + how_many]; + } + + const ctokent &consume_token() + { + PRECONDITION(token_index < tokens.size()); + PRECONDITION(!is_eof(tokens[token_index])); + return tokens[token_index++]; + } + + static bool is_storage_class(const ctokent &token) + { + return token == "auto" || token == "extern" || token == "static" || + token == "register" || token == "_Thread_local"; + } + + static bool is_type_qualifier(const ctokent &token) + { + return token == "const" || token == "volatile" || token == "restrict" || + token == "_Atomic"; + } + + void skip_ws(tokenst &); + void parse_brackets(char open, char close, tokenst &dest); +}; + +std::ostream &operator<<(std::ostream &out, const c_declarationt &declaration) +{ + for(const auto &t : declaration.pre_declarator) + out << t.text; + + for(const auto &t : declaration.declarator) + out << t.text; + + for(const auto &t : declaration.post_declarator) + out << t.text; + + for(const auto &t : declaration.initializer) + out << t.text; + + return out; +} + +void c_declarationt::print(std::ostream &out) const +{ + if(!declarator.empty()) + { + out << "DECLARATOR: "; + for(const auto &t : declarator) + out << t.text; + out << '\n'; + } +} + +bool c_declarationt::is_function() const +{ + return !post_declarator.empty() && post_declarator.front() == '('; +} + +bool c_declarationt::has_body() const +{ + return !initializer.empty() && initializer.front() == '{'; +} + +optionalt c_declarationt::declared_identifier() const +{ + for(auto &t : declarator) + if(is_identifier(t)) + return t; + return {}; +} + +void mini_c_parsert::skip_ws(tokenst &dest) +{ + if(eof()) + return; + + while(is_ws(peek()) || is_comment(peek()) || + is_preprocessor_directive(peek())) + { + dest.push_back(consume_token()); + } +} + +void mini_c_parsert::parse_brackets(char open, char close, tokenst &dest) +{ + if(eof() || peek() != open) + return; + + std::size_t bracket_count = 0; + while(true) + { + if(eof()) + throw invalid_source_file_exceptiont("expected " + std::string(1, close)); + + auto &token = consume_token(); + dest.push_back(token); + if(token == open) + bracket_count++; + else if(token == close) + { + bracket_count--; + if(bracket_count == 0) + break; // done + } + } +} + +mini_c_parsert::tokenst mini_c_parsert::parse_pre_declarator() +{ + // type qualifier + // storage class + // type + // '*' + tokenst result; + + while(true) + { + skip_ws(result); + + if(eof()) + return result; + + auto &token = peek(); + + if( + is_type_qualifier(token) || is_storage_class(token) || token == '*' || + token == "int" || token == "signed" || token.text == "unsigned" || + token == "char" || token == "short" || token == "long" || + token == "float" || token == "double" || token == "inline" || + token == "typedef") + { + result.push_back(consume_token()); + } + else if(token == "enum" || token == "struct" || token == "union") + { + result.push_back(consume_token()); + + skip_ws(result); + + // may be followed by a tag + if(!eof() && is_identifier(peek())) + result.push_back(consume_token()); + + skip_ws(result); + + // may be followed by a body {...} + parse_brackets('{', '}', result); + } + else if(token == "__attribute__") + { + result.push_back(consume_token()); + skip_ws(result); + // followed by (( ... )) + parse_brackets('(', ')', result); + } + else if(is_identifier(token)) + { + // Might be typedef or the declarator. + // We look ahead for the next non-WS token to tell the difference. + std::size_t index = 1; + while(true) + { + const auto &next_token = peek(index); + if( + is_ws(next_token) || is_preprocessor_directive(next_token) || + is_comment(next_token)) + index++; + else + break; + } + + auto &next_token = peek(index); + if(!is_identifier(next_token) && next_token != '*') + { + // 'token' is the declarator + return result; + } + else + result.push_back(consume_token()); // it's a type + } + else if(token == ';') + return result; + else if(token == '(') // function type, part of declarator + return result; + else + throw invalid_source_file_exceptiont( + "expected a declaration but got '" + token.text + "'"); + } +} + +mini_c_parsert::tokenst mini_c_parsert::parse_declarator() +{ + // symbol + // ((...* symbol ...)) + + if(eof()) + return {}; + + if(peek() == ';') + return {}; + + if(peek() == '(') + { + tokenst result; + parse_brackets('(', ')', result); + return result; + } + else if(is_identifier(peek())) + { + return {consume_token()}; + } + else + throw invalid_source_file_exceptiont("expected an identifier"); +} + +mini_c_parsert::tokenst mini_c_parsert::parse_post_declarator() +{ + // consume everything until we see one of the following: + // 1) ';' (end of declaration) + // 2) '{' (function body) + // 3) '=' (initializer) + + tokenst result; + + while(true) + { + if(eof()) + return result; + + if(peek() == ';' || peek() == '{' || peek() == '=') + return result; + + result.push_back(consume_token()); + } +} + +mini_c_parsert::tokenst mini_c_parsert::parse_initializer() +{ + if(eof()) + return {}; + else if(peek() == '=') + { + tokenst result; + while(true) + { + if(eof()) + throw invalid_source_file_exceptiont("expected an initializer"); + auto &token = consume_token(); + result.push_back(token); + if(token == ';') + return result; + } + } + else if(peek() == ';') + { + // done + return {consume_token()}; + } + else if(peek() == '{') + { + // function body + tokenst result; + std::size_t bracket_count = 0; + while(true) + { + if(eof()) + throw invalid_source_file_exceptiont("eof in function body"); + auto &token = consume_token(); + result.push_back(token); + if(token == '{') + bracket_count++; + else if(token == '}') + { + bracket_count--; + if(bracket_count == 0) + return result; + } + } + } + else + PRECONDITION(false); +} + +c_declarationt mini_c_parsert::parse_declaration() +{ + c_declarationt result; + + result.pre_declarator = parse_pre_declarator(); + result.declarator = parse_declarator(); + result.post_declarator = parse_post_declarator(); + result.initializer = parse_initializer(); + + return result; +} + +c_translation_unitt mini_c_parsert::parse(std::istream &in) +{ + cscannert cscanner(in); + cscanner.return_WS_and_comments = true; + tokens = cscanner.get_tokens(); + token_index = 0; + + if(tokens.empty()) + return {}; + + DATA_INVARIANT(is_eof(tokens.back()), "token stream must end on eof"); + + c_translation_unitt result; + + while(!eof()) + result.push_back(parse_declaration()); + + return result; +} + +c_translation_unitt parse_c(std::istream &in) +{ + return mini_c_parsert().parse(in); +} diff --git a/src/crangler/mini_c_parser.h b/src/crangler/mini_c_parser.h new file mode 100644 index 00000000000..d543fabd0c8 --- /dev/null +++ b/src/crangler/mini_c_parser.h @@ -0,0 +1,44 @@ +/*******************************************************************\ + +Module: Mini C Parser + +Author: Daniel Kroening, dkr@amazon.com + +\*******************************************************************/ + +/// \file +/// Mini C Parser + +#ifndef CPROVER_CRANGLER_MINI_C_PARSER_H +#define CPROVER_CRANGLER_MINI_C_PARSER_H + +#include "cscanner.h" + +#include +#include + +#include + +struct c_declarationt +{ + // could be C++20 std::span to avoid copying + using tokenst = std::vector; + + tokenst pre_declarator; + tokenst declarator; + tokenst post_declarator; + tokenst initializer; + + void print(std::ostream &) const; + bool is_function() const; + bool has_body() const; + optionalt declared_identifier() const; +}; + +using c_translation_unitt = std::vector; + +c_translation_unitt parse_c(std::istream &); + +std::ostream &operator<<(std::ostream &, const c_declarationt &); + +#endif // CPROVER_CRANGLER_MINI_C_PARSER_H diff --git a/src/crangler/module_dependencies.txt b/src/crangler/module_dependencies.txt new file mode 100644 index 00000000000..fefb43a32e6 --- /dev/null +++ b/src/crangler/module_dependencies.txt @@ -0,0 +1,2 @@ +json +util diff --git a/src/crangler/scanner.l b/src/crangler/scanner.l new file mode 100644 index 00000000000..d45027b3cbd --- /dev/null +++ b/src/crangler/scanner.l @@ -0,0 +1,105 @@ +%option nounput +%option noinput + +HexDigit [[:xdigit:]] +UnicodeEscape \\u+{HexDigit}{HexDigit}{HexDigit}{HexDigit} +OctalEscape \\([0-7]{1,2}|[0-3][0-7]{2}) +EscapeSequence \\[btnfr"'\\]|{OctalEscape}|{UnicodeEscape} +LineTerminator \n|\r\n? +WhiteSpace [ \t\f]+ + +TraditionalComment "/*"([^*]|(\*+[^*/]))*\*+\/ +EndOfLineComment "//"[^\r\n]* + +IdentifierStart ([A-Z]|[a-z]|"_"|"$"|[\x80-\xff]) +IdentifierPart ({IdentifierStart}|[0-9]) + +Identifier {IdentifierStart}{IdentifierPart}* + +IntegerTypeSuffix [lL]? + +DecimalNumeral 0|[1-9][0-9_]* +DecimalIntegerLiteral {DecimalNumeral}{IntegerTypeSuffix}? + +HexNumeral 0[xX][[:xdigit:]_]+ +HexIntegerLiteral {HexNumeral}{IntegerTypeSuffix}? + +OctalNumeral 0[0-7_]+ +OctalIntegerLiteral {OctalNumeral}{IntegerTypeSuffix}? + +BinaryNumeral 0[bB][01_]+ +BinaryIntegerLiteral {BinaryNumeral}{IntegerTypeSuffix}? + +IntegerLiteral {DecimalIntegerLiteral}|{HexIntegerLiteral}|{OctalIntegerLiteral}|{BinaryIntegerLiteral} + +FloatTypeSuffix [fFdD] +DecimalFloatingPointLiteral ([0-9_]+\.?[0-9_]*|\.[0-9_]+)([eE][-+]{DecimalNumeral}+)?{FloatTypeSuffix}? +HexadecimalFloatingPointLiteral 0[xX][[:xdigit:]_]*\.[[:xdigit:]_]*[pP][-+]{DecimalNumeral}+{FloatTypeSuffix}? +FloatingPointLiteral {DecimalFloatingPointLiteral}|{HexadecimalFloatingPointLiteral} + +CharacterLiteral '([^'\\\n]|{EscapeSequence})' + +StringLiteral \"([^"\\\n]|{EscapeSequence})*\" + +Separator [(){}\[\];,.@]|"..."|:: + +Operator [-+=<>*/&|^%!~?:]|[-=<>!+*/&|^%]=|&&|"||"|"++"|--|<<=?|>>>?=? + +PreprocessorStart {WhiteSpace}*"#"{WhiteSpace}* +PreprocessorDirective {PreprocessorStart}[^\r\n]* + +%{ +#ifdef _WIN32 +#define YY_NO_UNISTD_H +static int isatty(int) { return 0; } +#endif + +#include + +#include +#include +#include + +#include "cscanner.h" + +#define YY_INPUT(buf, result, max_size) \ + do { \ + if(!cscanner_ptr->in) \ + result=YY_NULL; \ + else \ + { \ + cscanner_ptr->in.read(buf, max_size); \ + result = cscanner_ptr->in.gcount(); \ + } \ + } while(0) + +#define token(t) (cscanner_ptr->set_token(yytext, ctokent::t), ctokent::t) +%} + +%% + +{IntegerLiteral} return token(INT_LIT); +{FloatingPointLiteral} return token(FLOAT_LIT); +{CharacterLiteral} return token(CHAR_LIT); +{StringLiteral} return token(STRING_LIT); +{Separator} return token(SEPARATOR); +{Operator} return token(OPERATOR); +{Identifier} return token(IDENTIFIER); +{PreprocessorDirective} return token(PREPROCESSOR_DIRECTIVE); +{TraditionalComment} { for(const char *t = yytext; *t!=0; t++) // need to count newlines + if(*t=='\n') cscanner_ptr->line_number++; + if(cscanner_ptr->return_WS_and_comments) return token(C_COMMENT); } +{EndOfLineComment} { if(cscanner_ptr->return_WS_and_comments) return token(CPP_COMMENT); } +{WhiteSpace} if(cscanner_ptr->return_WS_and_comments) return token(WS); +{LineTerminator} { cscanner_ptr->line_number++; if(cscanner_ptr->return_WS_and_comments) return token(WS); } +<> return token(END_OF_FILE); +. return token(UNKNOWN); + +%% + +void initialize_yyc_scanner() +{ + BEGIN(INITIAL); +} + +int yywrap() { return 1; }