Skip to content

Commit 6807bed

Browse files
Merge pull request #1251 from LAJW/refactor/jar_file
Refactor, fix and document jar_filet class
2 parents 22a4243 + a1cf348 commit 6807bed

File tree

8 files changed

+270
-219
lines changed

8 files changed

+270
-219
lines changed

src/java_bytecode/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ SRC = bytecode_info.cpp \
2525
java_string_library_preprocess.cpp \
2626
java_types.cpp \
2727
java_utils.cpp \
28+
mz_zip_archive.cpp \
2829
select_pointer_type.cpp \
2930
# Empty last line
3031

src/java_bytecode/jar_file.cpp

Lines changed: 68 additions & 112 deletions
Original file line numberDiff line numberDiff line change
@@ -1,144 +1,100 @@
11
/*******************************************************************\
22
3-
Module:
3+
Module: Jar file reader
44
5-
Author: Daniel Kroening, [email protected]
5+
Author: Diffblue Ltd
66
77
\*******************************************************************/
88

99
#include "jar_file.h"
10-
11-
#include <cstring>
12-
#include <unordered_set>
13-
14-
#include <json/json_parser.h>
10+
#include <cctype>
1511
#include <util/suffix.h>
1612
#include <util/invariant.h>
13+
#include "java_class_loader_limit.h"
1714

18-
void jar_filet::open(
19-
java_class_loader_limitt &class_loader_limit,
20-
const std::string &filename)
15+
jar_filet::jar_filet(
16+
java_class_loader_limitt &limit,
17+
const std::string &filename):
18+
m_zip_archive(filename)
2119
{
22-
if(!mz_ok)
20+
const size_t file_count=m_zip_archive.get_num_files();
21+
for(size_t index=0; index<file_count; index++)
2322
{
24-
memset(&zip, 0, sizeof(zip));
25-
mz_bool mz_open=mz_zip_reader_init_file(&zip, filename.c_str(), 0);
26-
mz_ok=mz_open==MZ_TRUE;
23+
const auto filename=m_zip_archive.get_filename(index);
24+
if(!has_suffix(filename, ".class") || limit.load_class_file(filename))
25+
m_name_to_index.emplace(filename, index);
2726
}
27+
}
2828

29-
if(mz_ok)
30-
{
31-
std::size_t number_of_files=
32-
mz_zip_reader_get_num_files(&zip);
33-
34-
for(std::size_t i=0; i<number_of_files; i++)
35-
{
36-
// get the length of the filename, including the trailing \0
37-
mz_uint filename_length=mz_zip_reader_get_filename(&zip, i, nullptr, 0);
38-
std::vector<char> filename_char(filename_length+1);
39-
INVARIANT(filename_length>=1, "buffer size must include trailing \\0");
29+
// VS: No default move constructors or assigns
4030

41-
// read and convert to std::string
42-
mz_uint filename_len=
43-
mz_zip_reader_get_filename(
44-
&zip, i, filename_char.data(), filename_length);
45-
INVARIANT(
46-
filename_length==filename_len,
47-
"buffer size was incorrectly pre-computed");
48-
std::string file_name(filename_char.data());
49-
#if DEBUG
50-
debug()
51-
<< "jar_filet.open: idx " << i
52-
<< " len " << filename_len
53-
<< " filename '" << std::string(filename_char.data()) << "'" << eom;
54-
#endif
55-
INVARIANT(file_name.size()==filename_len-1, "no \\0 found in file name");
31+
jar_filet::jar_filet(jar_filet &&other):
32+
m_zip_archive(std::move(other.m_zip_archive)),
33+
m_name_to_index((other.m_name_to_index)) {}
5634

57-
// non-class files are loaded in any case
58-
bool add_file=!has_suffix(file_name, ".class");
59-
// load .class file only if they match regex / are in match set
60-
add_file|=class_loader_limit.load_class_file(file_name);
61-
if(add_file)
62-
{
63-
if(has_suffix(file_name, ".class"))
64-
status() << "read class file " << file_name
65-
<< " from " << filename << eom;
66-
filtered_jar[file_name]=i;
67-
}
68-
}
69-
}
35+
jar_filet &jar_filet::operator=(jar_filet &&other)
36+
{
37+
m_zip_archive=std::move(other.m_zip_archive);
38+
m_name_to_index=std::move(other.m_name_to_index);
39+
return *this;
7040
}
7141

72-
jar_filet::~jar_filet()
42+
std::string jar_filet::get_entry(const std::string &name)
7343
{
74-
if(mz_ok)
44+
const auto entry=m_name_to_index.find(name);
45+
INVARIANT(entry!=m_name_to_index.end(), "File doesn't exist");
46+
try
47+
{
48+
return m_zip_archive.extract(entry->second);
49+
}
50+
catch(const std::runtime_error &)
7551
{
76-
mz_zip_reader_end(&zip);
77-
mz_ok=false;
52+
return "";
7853
}
7954
}
8055

81-
std::string jar_filet::get_entry(const irep_idt &name)
56+
static bool is_space(const char ch)
8257
{
83-
if(!mz_ok)
84-
return std::string("");
85-
86-
std::string dest;
87-
88-
auto entry=filtered_jar.find(name);
89-
assert(entry!=filtered_jar.end());
90-
91-
size_t real_index=entry->second;
92-
mz_zip_archive_file_stat file_stat;
93-
memset(&file_stat, 0, sizeof(file_stat));
94-
mz_bool stat_ok=mz_zip_reader_file_stat(&zip, real_index, &file_stat);
95-
if(stat_ok!=MZ_TRUE)
96-
return std::string();
97-
std::vector<char> buffer;
98-
size_t bufsize=file_stat.m_uncomp_size;
99-
buffer.resize(bufsize);
100-
mz_bool read_ok=
101-
mz_zip_reader_extract_to_mem(&zip, real_index, buffer.data(), bufsize, 0);
102-
if(read_ok!=MZ_TRUE)
103-
return std::string();
104-
105-
dest.insert(dest.end(), buffer.begin(), buffer.end());
106-
107-
return dest;
58+
return std::isspace(ch);
10859
}
10960

110-
jar_filet::manifestt jar_filet::get_manifest()
61+
/// Remove leading and trailing whitespace characters from string
62+
static std::string trim(
63+
const std::string::const_iterator begin,
64+
const std::string::const_iterator end)
11165
{
112-
auto entry=filtered_jar.find("META-INF/MANIFEST.MF");
113-
if(entry==filtered_jar.end())
114-
return manifestt();
115-
116-
std::string dest=get_entry(entry->first);
117-
std::istringstream in(dest);
118-
119-
manifestt manifest;
66+
const auto out_begin=std::find_if_not(begin, end, is_space);
67+
const auto out_end=std::find_if_not(
68+
std::string::const_reverse_iterator(end),
69+
std::string::const_reverse_iterator(out_begin),
70+
is_space).base();
71+
return { out_begin, out_end };
72+
}
12073

121-
std::string line;
122-
while(std::getline(in, line))
74+
std::unordered_map<std::string, std::string> jar_filet::get_manifest()
75+
{
76+
std::unordered_map<std::string, std::string> out;
77+
const auto entry=m_name_to_index.find("META-INF/MANIFEST.MF");
78+
if(entry!=m_name_to_index.end())
12379
{
124-
std::size_t pos=line.find(':');
125-
if(pos==std::string::npos)
126-
continue;
127-
std::string key=line.substr(0, pos);
128-
129-
// skip spaces
130-
pos++;
131-
while(pos<line.size() && line[pos]==' ') pos++;
132-
133-
std::string value=line.substr(pos, std::string::npos);
134-
135-
// trim off \r
136-
if(!value.empty() && *value.rbegin()=='\r')
137-
value.resize(value.size()-1);
138-
139-
// store
140-
manifest[key]=value;
80+
std::istringstream in(this->get_entry(entry->first));
81+
std::string line;
82+
while(std::getline(in, line))
83+
{
84+
const auto key_end=std::find(line.cbegin(), line.cend(), ':');
85+
if(key_end!=line.cend())
86+
out.emplace(
87+
trim(line.cbegin(), key_end),
88+
trim(std::next(key_end), line.cend()));
89+
}
14190
}
91+
return out;
92+
}
14293

143-
return manifest;
94+
std::vector<std::string> jar_filet::filenames() const
95+
{
96+
std::vector<std::string> out;
97+
for(const auto &pair : m_name_to_index)
98+
out.emplace_back(pair.first);
99+
return out;
144100
}

src/java_bytecode/jar_file.h

Lines changed: 30 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -1,110 +1,48 @@
11
/*******************************************************************\
22
3-
Module: JAR File Reading
3+
Module: Jar file reader
44
5-
Author: Daniel Kroening, [email protected]
5+
Author: Diffblue Ltd
66
77
\*******************************************************************/
88

9-
/// \file
10-
/// JAR File Reading
11-
129
#ifndef CPROVER_JAVA_BYTECODE_JAR_FILE_H
1310
#define CPROVER_JAVA_BYTECODE_JAR_FILE_H
1411

15-
#define _LARGEFILE64_SOURCE 1
16-
#include "miniz/miniz.h"
17-
12+
#include <unordered_map>
13+
#include <memory>
1814
#include <string>
1915
#include <vector>
20-
#include <map>
21-
#include <regex>
22-
#include <util/message.h>
16+
#include "mz_zip_archive.h"
2317

24-
#include "java_class_loader_limit.h"
18+
class java_class_loader_limitt;
2519

26-
/// An in-memory representation of a JAR file, consisting of a handler to a zip
27-
/// file (field \ref zip) and a map from file names inside the zip file to the
28-
/// index they occupy in the root directory (field \ref filtered_jar).
29-
///
30-
/// Both fields are initialize by a call to open(). Method get_entry() reads a
31-
/// file from the zip and returns it.
32-
class jar_filet:public messaget
20+
/// Class representing a .jar archive
21+
class jar_filet final
3322
{
3423
public:
35-
jar_filet():
36-
mz_ok(false)
37-
// `zip` will be initialized by open()
38-
{
39-
}
40-
41-
~jar_filet();
42-
43-
/// Initializes \ref zip to store the JAR file pointed by \p filepath and
44-
/// loads the map \ref filtered_jar with the .class names allowed by \p limit.
45-
void open(java_class_loader_limitt &limit, const std::string &filepath);
46-
47-
/// Test for error; 'true' means we are good.
48-
explicit operator bool() const { return mz_ok; }
49-
50-
/// Reads the \ref zip field and returns a string storing the data contained
51-
/// in file \p name.
52-
std::string get_entry(const irep_idt &name);
53-
54-
/// Maps the names of the files stored in the JAR file to the index they
55-
/// occupy (starting from 0) in the JAR central directory. Populated by
56-
/// method open() above.
57-
typedef std::map<irep_idt, size_t> filtered_jart;
58-
filtered_jart filtered_jar;
59-
60-
typedef std::map<std::string, std::string> manifestt;
61-
manifestt get_manifest();
62-
63-
protected:
64-
/// A handle representing the zip file
65-
mz_zip_archive zip;
66-
67-
/// True iff the \ref zip field has been correctly initialized with a JAR file
68-
bool mz_ok;
69-
};
70-
71-
/// A pool of jar_filet objects, indexed by the filesystem path name used to
72-
/// load that jar_filet.
73-
///
74-
/// The state of the class is maintained by the field \ref file_map, a std::map
75-
/// associating the path of a .jar file with its in-memory representation.
76-
/// A call to
77-
///
78-
/// ```
79-
/// operator()(limit, path)
80-
/// ```
81-
///
82-
/// will either return a previously loaded jar_filet, if it is found in the map,
83-
/// or will load that file (using jar_filet::open) and return a newly created
84-
/// jar_filet.
85-
class jar_poolt:public messaget
86-
{
87-
public:
88-
jar_filet &operator()(
89-
java_class_loader_limitt &class_loader_limit,
90-
const std::string &file_name)
91-
{
92-
file_mapt::iterator it=file_map.find(file_name);
93-
if(it==file_map.end())
94-
{
95-
jar_filet &jar_file=file_map[file_name];
96-
jar_file.set_message_handler(get_message_handler());
97-
jar_file.open(class_loader_limit, file_name);
98-
return jar_file;
99-
}
100-
else
101-
return file_map[file_name];
102-
}
103-
104-
protected:
105-
/// A map from filesystem paths to jar_filet objects
106-
typedef std::map<std::string, jar_filet> file_mapt;
107-
file_mapt file_map;
24+
/// Open java file for reading
25+
/// \param limit Object limiting number of loaded .class files
26+
/// \param filename Name of the file
27+
/// \throw Throws std::runtime_error if file cannot be opened
28+
jar_filet(java_class_loader_limitt &limit, const std::string &filename);
29+
jar_filet(const jar_filet &)=delete;
30+
jar_filet &operator=(const jar_filet &)=delete;
31+
jar_filet(jar_filet &&);
32+
jar_filet &operator=(jar_filet &&);
33+
~jar_filet()=default;
34+
/// Get contents of a file in the jar archive.
35+
/// Terminates the program if file doesn't exist
36+
/// \param filename Name of the file in the archive
37+
std::string get_entry(const std::string &filename);
38+
/// Get contents of the Manifest file in the jar archive
39+
std::unordered_map<std::string, std::string> get_manifest();
40+
/// Get list of filenames in the archive
41+
std::vector<std::string> filenames() const;
42+
private:
43+
mz_zip_archivet m_zip_archive;
44+
/// Map of filename to the file index in the zip archive
45+
std::unordered_map<std::string, size_t> m_name_to_index;
10846
};
10947

11048
#endif // CPROVER_JAVA_BYTECODE_JAR_FILE_H

src/java_bytecode/java_bytecode_language.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -138,8 +138,7 @@ bool java_bytecode_languaget::parse(
138138
java_cp_include_files);
139139
if(config.java.main_class.empty())
140140
{
141-
// load the .jar file and retrieve its manifest
142-
jar_filet::manifestt manifest=
141+
auto manifest=
143142
java_class_loader.jar_pool(class_loader_limit, path).get_manifest();
144143
std::string manifest_main_class=manifest["Main-Class"];
145144

0 commit comments

Comments
 (0)