diff --git a/.travis.yml b/.travis.yml index fbd3915b515..c702dbd8f4d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -29,10 +29,12 @@ matrix: - libwww-perl - clang-3.7 - libubsan0 + - libc++-dev + - libstdc++-5-dev before_install: - mkdir bin ; ln -s /usr/bin/clang-3.7 bin/gcc # env: COMPILER=clang++-3.7 SAN_FLAGS="-fsanitize=undefined -fno-sanitize-recover=undefined,integer -fno-omit-frame-pointer" - env: COMPILER=clang++-3.7 + env: COMPILER="clang++-3.7 -stdlib=libc++" - os: osx compiler: gcc env: COMPILER=g++ @@ -45,6 +47,6 @@ matrix: script: - if [ -L bin/gcc ] ; then export PATH=$PWD/bin:$PATH ; fi ; make -C src minisat2-download && - make -C src CXX=$COMPILER CXXFLAGS="-Wall -O2 -g -Werror -Wno-deprecated-register -pedantic -Wno-sign-compare" -j2 && + make -C src CXX="$COMPILER" CXXFLAGS="-Wall -O2 -g -Werror -Wno-deprecated-register -pedantic -Wno-sign-compare" -j2 && env UBSAN_OPTIONS=print_stacktrace=1 make -C regression test && - make -C src CXX=$COMPILER CXXFLAGS=$FLAGS -j2 aa-symex.dir cegis.dir clobber.dir memory-models.dir musketeer.dir + make -C src CXX="$COMPILER" CXXFLAGS="-Wall -O2 -g -Werror -Wno-deprecated-register -pedantic -Wno-sign-compare" -j2 aa-symex.dir cegis.dir clobber.dir memory-models.dir musketeer.dir diff --git a/src/util/unicode.cpp b/src/util/unicode.cpp index 4502605c600..12ea8d04a0f 100644 --- a/src/util/unicode.cpp +++ b/src/util/unicode.cpp @@ -7,6 +7,8 @@ Author: Daniel Kroening, kroening@kroening.com \*******************************************************************/ #include +#include +#include #include "unicode.h" @@ -253,3 +255,41 @@ const char **narrow_argv(int argc, const wchar_t **argv_wide) return argv_narrow; } + +std::wstring utf8_to_utf16_big_endian(const std::string& in) +{ + std::wstring_convert > converter; + return converter.from_bytes(in); +} + +std::wstring utf8_to_utf16_little_endian(const std::string& in) +{ + const std::codecvt_mode mode=std::codecvt_mode::little_endian; + + // default largest value codecvt_utf8_utf16 reads without error is 0x10ffff + // see: http://en.cppreference.com/w/cpp/locale/codecvt_utf8_utf16 + const unsigned long maxcode=0x10ffff; + + typedef std::codecvt_utf8_utf16 codecvt_utf8_utf16t; + std::wstring_convert converter; + return converter.from_bytes(in); +} + +std::string utf16_little_endian_to_ascii(const std::wstring& in) +{ + std::string result; + std::locale loc; + for(const auto c : in) + { + if(c<=255 && isprint(c, loc)) + result+=(unsigned char)c; + else + { + result+="\\u"; + char hex[5]; + snprintf(hex, sizeof(hex), "%04x", (wchar_t)c); + result+=hex; + } + } + return result; +} diff --git a/src/util/unicode.h b/src/util/unicode.h index edad95039f0..1e5040344d0 100644 --- a/src/util/unicode.h +++ b/src/util/unicode.h @@ -22,6 +22,10 @@ std::wstring widen(const std::string &s); std::string utf32_to_utf8(const std::basic_string &s); std::string utf16_to_utf8(const std::basic_string &s); +std::wstring utf8_to_utf16_big_endian(const std::string&); +std::wstring utf8_to_utf16_little_endian(const std::string&); +std::string utf16_little_endian_to_ascii(const std::wstring& in); + const char **narrow_argv(int argc, const wchar_t **argv_wide); #endif // CPROVER_UTIL_UNICODE_H