Skip to content

Limit class loading #604

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Mar 13, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added regression/cbmc-java/jar-file4/A.jar
Binary file not shown.
Binary file added regression/cbmc-java/jar-file4/B.jar
Binary file not shown.
Binary file added regression/cbmc-java/jar-file4/C.jar
Binary file not shown.
12 changes: 12 additions & 0 deletions regression/cbmc-java/jar-file4/jar.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{
"jar":
[
"A.jar",
"B.jar"
],
"classFiles":
[
"jarfile3$A.class",
"jarfile3.class"
]
}
10 changes: 10 additions & 0 deletions regression/cbmc-java/jar-file4/test.desc
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
CORE
C.jar
--function jarfile3.f --java-cp-include-files "@jar.json"
^EXIT=10$
^SIGNAL=0$
.*SUCCESS$
.*FAILURE$
^VERIFICATION FAILED
--
^warning: ignoring
3 changes: 2 additions & 1 deletion src/cbmc/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@ OBJ += ../ansi-c/ansi-c$(LIBEXT) \
../assembler/assembler$(LIBEXT) \
../solvers/solvers$(LIBEXT) \
../util/util$(LIBEXT) \
../miniz/miniz$(OBJEXT)
../miniz/miniz$(OBJEXT) \
../json/json$(LIBEXT)

INCLUDES= -I ..

Expand Down
1 change: 1 addition & 0 deletions src/cbmc/cbmc_parse_options.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1155,6 +1155,7 @@ void cbmc_parse_optionst::help()
// NOLINTNEXTLINE(whitespace/line_length)
" --java-max-vla-length limit the length of user-code-created arrays\n"
// NOLINTNEXTLINE(whitespace/line_length)
" --java-cp-include-files regexp or JSON list of files to load (with '@' prefix)\n"
" --java-unwind-enum-static try to unwind loops in static initialization of enums\n"
"\n"
"Semantic transformations:\n"
Expand Down
1 change: 1 addition & 0 deletions src/cbmc/cbmc_parse_options.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ class optionst;
"(round-to-nearest)(round-to-plus-inf)(round-to-minus-inf)(round-to-zero)" \
"(graphml-witness):" \
"(java-max-vla-length):(java-unwind-enum-static)" \
"(java-cp-include-files):" \
"(localize-faults)(localize-faults-method):" \
"(lazy-methods)" \
"(fixedbv)(floatbv)(all-claims)(all-properties)" // legacy, and will eventually disappear // NOLINT(whitespace/line_length)
Expand Down
3 changes: 2 additions & 1 deletion src/cegis/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,8 @@ OBJ += ../ansi-c/ansi-c$(LIBEXT) \
../cbmc/cbmc_dimacs$(OBJEXT) ../cbmc/all_properties$(OBJEXT) \
../cbmc/fault_localization$(OBJEXT) \
../cbmc/symex_coverage$(OBJEXT) \
../miniz/miniz$(OBJEXT)
../miniz/miniz$(OBJEXT) \
../json/json$(LIBEXT)

INCLUDES= -I ..

Expand Down
3 changes: 2 additions & 1 deletion src/goto-cc/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ OBJ += ../big-int/big-int$(LIBEXT) \
../xmllang/xmllang$(LIBEXT) \
../assembler/assembler$(LIBEXT) \
../langapi/langapi$(LIBEXT) \
../miniz/miniz$(OBJEXT)
../miniz/miniz$(OBJEXT) \
../json/json$(LIBEXT)

INCLUDES= -I ..

Expand Down
3 changes: 2 additions & 1 deletion src/goto-diff/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ OBJ += ../ansi-c/ansi-c$(LIBEXT) \
../xmllang/xmllang$(LIBEXT) \
../util/util$(LIBEXT) \
../solvers/solvers$(LIBEXT) \
../miniz/miniz$(OBJEXT)
../miniz/miniz$(OBJEXT) \
../json/json$(LIBEXT)

INCLUDES= -I ..

Expand Down
3 changes: 2 additions & 1 deletion src/goto-instrument/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@ OBJ += ../ansi-c/ansi-c$(LIBEXT) \
../xmllang/xmllang$(LIBEXT) \
../util/util$(LIBEXT) \
../solvers/solvers$(LIBEXT) \
../miniz/miniz$(OBJEXT)
../miniz/miniz$(OBJEXT) \
../json/json$(LIBEXT)

INCLUDES= -I ..

Expand Down
87 changes: 60 additions & 27 deletions src/java_bytecode/jar_file.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,11 @@ Author: Daniel Kroening, [email protected]

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think this assertion holds anymore.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you refer to assert(i<index.size())? This does hold, index holds at most as many elements as the central directory and the entry i is translated vie the filtered_jart

#include <cstring>
#include <cassert>
#include <sstream>
#include <unordered_set>

#include <json/json_parser.h>
#include <util/suffix.h>
#include "jar_file.h"

/*******************************************************************\

Function: jar_filet::open
Expand All @@ -24,7 +25,9 @@ Function: jar_filet::open

\*******************************************************************/

void jar_filet::open(const std::string &filename)
void jar_filet::open(
std::string &java_cp_include_files,
const std::string &filename)
{
if(!mz_ok)
{
Expand All @@ -35,11 +38,37 @@ void jar_filet::open(const std::string &filename)

if(mz_ok)
{
// '@' signals file reading with list of class files to load
bool regex_match=java_cp_include_files[0]!='@';
std::regex regex_matcher;
std::smatch string_matcher;
std::unordered_set<std::string> set_matcher;
jsont json_cp_config;
if(regex_match)
regex_matcher=std::regex(java_cp_include_files);
else
{
assert(java_cp_include_files.length()>1);
if(parse_json(
java_cp_include_files.substr(1),
get_message_handler(),
json_cp_config))
throw "cannot read JSON input configuration for JAR loading";
if(!json_cp_config.is_object())
throw "the JSON file has a wrong format";
jsont include_files=json_cp_config["classFiles"];
if(!include_files.is_array())
throw "the JSON file has a wrong format";
for(const jsont &file_entry : include_files.array)
{
assert(file_entry.is_string());
set_matcher.insert(file_entry.value);
}
}

std::size_t number_of_files=
mz_zip_reader_get_num_files(&zip);

index.reserve(number_of_files);

for(std::size_t i=0; i<number_of_files; i++)
{
mz_uint filename_length=mz_zip_reader_get_filename(&zip, i, nullptr, 0);
Expand All @@ -48,8 +77,23 @@ void jar_filet::open(const std::string &filename)
mz_zip_reader_get_filename(&zip, i, filename_char, filename_length);
assert(filename_length==filename_len);
std::string file_name(filename_char);

// non-class files are loaded in any case
bool add_file=!has_suffix(file_name, ".class");
// load .class file only if they match regex
if(regex_match)
add_file|=std::regex_match(file_name, string_matcher, regex_matcher);
// load .class file only if it is in the match set
else
add_file|=set_matcher.find(file_name)!=set_matcher.end();
if(add_file)
{
if(has_suffix(file_name, ".class"))
status() << "read class file " << file_name
<< " from " << filename << eom;
filtered_jar[file_name]=i;
}
delete[] filename_char;
index.push_back(file_name);
}
}
}
Expand Down Expand Up @@ -87,25 +131,27 @@ Function: jar_filet::get_entry

\*******************************************************************/

std::string jar_filet::get_entry(std::size_t i)
std::string jar_filet::get_entry(const irep_idt &name)
{
if(!mz_ok)
return std::string("");

assert(i<index.size());

std::string dest;

auto entry=filtered_jar.find(name);
assert(entry!=filtered_jar.end());

size_t real_index=entry->second;
mz_zip_archive_file_stat file_stat;
memset(&file_stat, 0, sizeof(file_stat));
mz_bool stat_ok=mz_zip_reader_file_stat(&zip, i, &file_stat);
mz_bool stat_ok=mz_zip_reader_file_stat(&zip, real_index, &file_stat);
if(stat_ok!=MZ_TRUE)
return std::string();
std::vector<char> buffer;
size_t bufsize=file_stat.m_uncomp_size;
buffer.resize(bufsize);
mz_bool read_ok=
mz_zip_reader_extract_to_mem(&zip, i, buffer.data(), bufsize, 0);
mz_zip_reader_extract_to_mem(&zip, real_index, buffer.data(), bufsize, 0);
if(read_ok!=MZ_TRUE)
return std::string();

Expand All @@ -128,24 +174,11 @@ Function: jar_filet::get_manifest

jar_filet::manifestt jar_filet::get_manifest()
{
std::size_t i=0;
bool found=false;

for(const auto &e : index)
{
if(e=="META-INF/MANIFEST.MF")
{
found=true;
break;
}

i++;
}

if(!found)
auto entry=filtered_jar.find("META-INF/MANIFEST.MF");
if(entry==filtered_jar.end())
return manifestt();

std::string dest=get_entry(i);
std::string dest=get_entry(entry->first);
std::istringstream in(dest);

manifestt manifest;
Expand Down
34 changes: 20 additions & 14 deletions src/java_bytecode/jar_file.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,29 +15,26 @@ Author: Daniel Kroening, [email protected]
#include <string>
#include <vector>
#include <map>
#include <regex>
#include <util/message.h>

class jar_filet
class jar_filet:public messaget
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Where is messaget made use of, or initialized?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the JSON parser needs a valid message_handler and I also had wanted to give a status() about hidden / loaded files (why I will add)

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, but then you'll need to provide a message_handlert to configure things properly. Only that message_handlert coming in would have proper configuration in terms of verbosity and the way information should be printed. The code invoking jar_filet will have that, so it shouldn't be hard to do.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this trickles down, at least that's what I intended; the class loader sets the message handler of the jar_poolt

  void set_java_cp_include_files(std::string &java_cp_include_files)
  {
    jar_pool.set_java_cp_include_files(java_cp_include_files);
    jar_pool.set_message_handler(get_message_handler());
  }

and the jar_poolt sets the handler of the jar_filet

  jar_filet &operator()(const std::string &file_name)
  {
    if(java_cp_include_files.empty())
      throw "class regexp cannot be empty";
    file_mapt::iterator it=file_map.find(file_name);
    if(it==file_map.end())
    {
      jar_filet &jar_file=file_map[file_name];
      jar_file.set_message_handler(get_message_handler());
      jar_file.open(java_cp_include_files, file_name);
      return jar_file;
    }
    else
      return file_map[file_name];
  }

Out of laziness, I do not put it in the constructor. Also having an explicit constructor for jar_poolt raises errors as the implicit default constructor would disappear, which is used in the code. Therefore this approach has the least impact, and jar_filet/jar_poolt is only used here.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, thanks a lot for clarifying!

{
public:
jar_filet():mz_ok(false) { }

inline explicit jar_filet(const std::string &file_name)
{
open(file_name);
}

~jar_filet();

void open(const std::string &);
void open(std::string &java_cp_include_files, const std::string &);

// Test for error; 'true' means we are good.
inline explicit operator bool() const { return true; // TODO
}
explicit operator bool() const { return mz_ok; }

typedef std::vector<std::string> indext;
indext index;
// map internal index to real index in jar central directory
typedef std::map<irep_idt, size_t> filtered_jart;
filtered_jart filtered_jar;

std::string get_entry(std::size_t i);
std::string get_entry(const irep_idt &);

typedef std::map<std::string, std::string> manifestt;
manifestt get_manifest();
Expand All @@ -47,16 +44,24 @@ class jar_filet
bool mz_ok;
};

class jar_poolt
class jar_poolt:public messaget
{
public:
void set_java_cp_include_files(std::string &_java_cp_include_files)
{
java_cp_include_files=_java_cp_include_files;
}

jar_filet &operator()(const std::string &file_name)
{
if(java_cp_include_files.empty())
throw "class regexp cannot be empty";
file_mapt::iterator it=file_map.find(file_name);
if(it==file_map.end())
{
jar_filet &jar_file=file_map[file_name];
jar_file.open(file_name);
jar_file.set_message_handler(get_message_handler());
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does jar_filet benefit from being a messaget (see comments above)?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll add a status output to see about hidden files, also the JSON parser needs a valid one

jar_file.open(java_cp_include_files, file_name);
return jar_file;
}
else
Expand All @@ -66,6 +71,7 @@ class jar_poolt
protected:
typedef std::map<std::string, jar_filet> file_mapt;
file_mapt file_map;
std::string java_cp_include_files;
};

#endif // CPROVER_JAVA_BYTECODE_JAR_FILE_H
32 changes: 32 additions & 0 deletions src/java_bytecode/java_bytecode_language.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ Author: Daniel Kroening, [email protected]
#include <util/config.h>
#include <util/cmdline.h>
#include <util/string2int.h>
#include <json/json_parser.h>

#include <goto-programs/class_hierarchy.h>

Expand Down Expand Up @@ -55,6 +56,36 @@ void java_bytecode_languaget::get_language_options(const cmdlinet &cmd)
lazy_methods_mode=LAZY_METHODS_MODE_CONTEXT_INSENSITIVE;
else
lazy_methods_mode=LAZY_METHODS_MODE_EAGER;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add a blank line. Codewithoutblanksisreallyhardtoread.


if(cmd.isset("java-cp-include-files"))
{
java_cp_include_files=cmd.get_value("java-cp-include-files");
// load file list from JSON file
if(java_cp_include_files[0]=='@')
{
jsont json_cp_config;
if(parse_json(
java_cp_include_files.substr(1),
get_message_handler(),
json_cp_config))
throw "cannot read JSON input configuration for JAR loading";

if(!json_cp_config.is_object())
throw "the JSON file has a wrong format";
jsont include_files=json_cp_config["jar"];
if(!include_files.is_array())
throw "the JSON file has a wrong format";

// add jars from JSON config file to classpath
for(const jsont &file_entry : include_files.array)
{
assert(file_entry.is_string() && has_suffix(file_entry.value, ".jar"));
config.java.classpath.push_back(file_entry.value);
}
}
}
else
java_cp_include_files=".*";
}

/*******************************************************************\
Expand Down Expand Up @@ -129,6 +160,7 @@ bool java_bytecode_languaget::parse(
const std::string &path)
{
java_class_loader.set_message_handler(get_message_handler());
java_class_loader.set_java_cp_include_files(java_cp_include_files);

// look at extension
if(has_suffix(path, ".class"))
Expand Down
1 change: 1 addition & 0 deletions src/java_bytecode/java_bytecode_language.h
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ class java_bytecode_languaget:public languaget
lazy_methodst lazy_methods;
lazy_methods_modet lazy_methods_mode;
bool string_refinement_enabled;
std::string java_cp_include_files;
};

languaget *new_java_bytecode_language();
Expand Down
Loading