|
1 | 1 | /*******************************************************************\
|
2 | 2 |
|
3 |
| -Module: |
| 3 | +Module: Jar file reader |
4 | 4 |
|
5 |
| -Author: Daniel Kroening, [email protected] |
| 5 | +Author: Diffblue Ltd |
6 | 6 |
|
7 | 7 | \*******************************************************************/
|
8 | 8 |
|
9 | 9 | #include "jar_file.h"
|
10 |
| - |
11 |
| -#include <cstring> |
12 |
| -#include <unordered_set> |
13 |
| - |
14 |
| -#include <json/json_parser.h> |
| 10 | +#include <cctype> |
15 | 11 | #include <util/suffix.h>
|
16 | 12 | #include <util/invariant.h>
|
| 13 | +#include "java_class_loader_limit.h" |
17 | 14 |
|
18 |
| -void jar_filet::open( |
19 |
| - java_class_loader_limitt &class_loader_limit, |
20 |
| - const std::string &filename) |
| 15 | +jar_filet::jar_filet( |
| 16 | + java_class_loader_limitt &limit, |
| 17 | + const std::string &filename): |
| 18 | + m_zip_archive(filename) |
21 | 19 | {
|
22 |
| - if(!mz_ok) |
| 20 | + const size_t file_count=m_zip_archive.get_num_files(); |
| 21 | + for(size_t index=0; index<file_count; index++) |
23 | 22 | {
|
24 |
| - memset(&zip, 0, sizeof(zip)); |
25 |
| - mz_bool mz_open=mz_zip_reader_init_file(&zip, filename.c_str(), 0); |
26 |
| - mz_ok=mz_open==MZ_TRUE; |
| 23 | + const auto filename=m_zip_archive.get_filename(index); |
| 24 | + if(!has_suffix(filename, ".class") || limit.load_class_file(filename)) |
| 25 | + m_name_to_index.emplace(filename, index); |
27 | 26 | }
|
| 27 | +} |
28 | 28 |
|
29 |
| - if(mz_ok) |
30 |
| - { |
31 |
| - std::size_t number_of_files= |
32 |
| - mz_zip_reader_get_num_files(&zip); |
33 |
| - |
34 |
| - for(std::size_t i=0; i<number_of_files; i++) |
35 |
| - { |
36 |
| - // get the length of the filename, including the trailing \0 |
37 |
| - mz_uint filename_length=mz_zip_reader_get_filename(&zip, i, nullptr, 0); |
38 |
| - std::vector<char> filename_char(filename_length+1); |
39 |
| - INVARIANT(filename_length>=1, "buffer size must include trailing \\0"); |
| 29 | +// VS: No default move constructors or assigns |
40 | 30 |
|
41 |
| - // read and convert to std::string |
42 |
| - mz_uint filename_len= |
43 |
| - mz_zip_reader_get_filename( |
44 |
| - &zip, i, filename_char.data(), filename_length); |
45 |
| - INVARIANT( |
46 |
| - filename_length==filename_len, |
47 |
| - "buffer size was incorrectly pre-computed"); |
48 |
| - std::string file_name(filename_char.data()); |
49 |
| -#if DEBUG |
50 |
| - debug() |
51 |
| - << "jar_filet.open: idx " << i |
52 |
| - << " len " << filename_len |
53 |
| - << " filename '" << std::string(filename_char.data()) << "'" << eom; |
54 |
| -#endif |
55 |
| - INVARIANT(file_name.size()==filename_len-1, "no \\0 found in file name"); |
| 31 | +jar_filet::jar_filet(jar_filet &&other): |
| 32 | + m_zip_archive(std::move(other.m_zip_archive)), |
| 33 | + m_name_to_index((other.m_name_to_index)) {} |
56 | 34 |
|
57 |
| - // non-class files are loaded in any case |
58 |
| - bool add_file=!has_suffix(file_name, ".class"); |
59 |
| - // load .class file only if they match regex / are in match set |
60 |
| - add_file|=class_loader_limit.load_class_file(file_name); |
61 |
| - if(add_file) |
62 |
| - { |
63 |
| - if(has_suffix(file_name, ".class")) |
64 |
| - status() << "read class file " << file_name |
65 |
| - << " from " << filename << eom; |
66 |
| - filtered_jar[file_name]=i; |
67 |
| - } |
68 |
| - } |
69 |
| - } |
| 35 | +jar_filet &jar_filet::operator=(jar_filet &&other) |
| 36 | +{ |
| 37 | + m_zip_archive=std::move(other.m_zip_archive); |
| 38 | + m_name_to_index=std::move(other.m_name_to_index); |
| 39 | + return *this; |
70 | 40 | }
|
71 | 41 |
|
72 |
| -jar_filet::~jar_filet() |
| 42 | +std::string jar_filet::get_entry(const std::string &name) |
73 | 43 | {
|
74 |
| - if(mz_ok) |
| 44 | + const auto entry=m_name_to_index.find(name); |
| 45 | + INVARIANT(entry!=m_name_to_index.end(), "File doesn't exist"); |
| 46 | + try |
| 47 | + { |
| 48 | + return m_zip_archive.extract(entry->second); |
| 49 | + } |
| 50 | + catch(const std::runtime_error &) |
75 | 51 | {
|
76 |
| - mz_zip_reader_end(&zip); |
77 |
| - mz_ok=false; |
| 52 | + return ""; |
78 | 53 | }
|
79 | 54 | }
|
80 | 55 |
|
81 |
| -std::string jar_filet::get_entry(const irep_idt &name) |
| 56 | +static bool is_space(const char ch) |
82 | 57 | {
|
83 |
| - if(!mz_ok) |
84 |
| - return std::string(""); |
85 |
| - |
86 |
| - std::string dest; |
87 |
| - |
88 |
| - auto entry=filtered_jar.find(name); |
89 |
| - assert(entry!=filtered_jar.end()); |
90 |
| - |
91 |
| - size_t real_index=entry->second; |
92 |
| - mz_zip_archive_file_stat file_stat; |
93 |
| - memset(&file_stat, 0, sizeof(file_stat)); |
94 |
| - mz_bool stat_ok=mz_zip_reader_file_stat(&zip, real_index, &file_stat); |
95 |
| - if(stat_ok!=MZ_TRUE) |
96 |
| - return std::string(); |
97 |
| - std::vector<char> buffer; |
98 |
| - size_t bufsize=file_stat.m_uncomp_size; |
99 |
| - buffer.resize(bufsize); |
100 |
| - mz_bool read_ok= |
101 |
| - mz_zip_reader_extract_to_mem(&zip, real_index, buffer.data(), bufsize, 0); |
102 |
| - if(read_ok!=MZ_TRUE) |
103 |
| - return std::string(); |
104 |
| - |
105 |
| - dest.insert(dest.end(), buffer.begin(), buffer.end()); |
106 |
| - |
107 |
| - return dest; |
| 58 | + return std::isspace(ch); |
108 | 59 | }
|
109 | 60 |
|
110 |
| -jar_filet::manifestt jar_filet::get_manifest() |
| 61 | +/// Remove leading and trailing whitespace characters from string |
| 62 | +static std::string trim( |
| 63 | + const std::string::const_iterator begin, |
| 64 | + const std::string::const_iterator end) |
111 | 65 | {
|
112 |
| - auto entry=filtered_jar.find("META-INF/MANIFEST.MF"); |
113 |
| - if(entry==filtered_jar.end()) |
114 |
| - return manifestt(); |
115 |
| - |
116 |
| - std::string dest=get_entry(entry->first); |
117 |
| - std::istringstream in(dest); |
118 |
| - |
119 |
| - manifestt manifest; |
| 66 | + const auto out_begin=std::find_if_not(begin, end, is_space); |
| 67 | + const auto out_end=std::find_if_not( |
| 68 | + std::string::const_reverse_iterator(end), |
| 69 | + std::string::const_reverse_iterator(out_begin), |
| 70 | + is_space).base(); |
| 71 | + return { out_begin, out_end }; |
| 72 | +} |
120 | 73 |
|
121 |
| - std::string line; |
122 |
| - while(std::getline(in, line)) |
| 74 | +std::unordered_map<std::string, std::string> jar_filet::get_manifest() |
| 75 | +{ |
| 76 | + std::unordered_map<std::string, std::string> out; |
| 77 | + const auto entry=m_name_to_index.find("META-INF/MANIFEST.MF"); |
| 78 | + if(entry!=m_name_to_index.end()) |
123 | 79 | {
|
124 |
| - std::size_t pos=line.find(':'); |
125 |
| - if(pos==std::string::npos) |
126 |
| - continue; |
127 |
| - std::string key=line.substr(0, pos); |
128 |
| - |
129 |
| - // skip spaces |
130 |
| - pos++; |
131 |
| - while(pos<line.size() && line[pos]==' ') pos++; |
132 |
| - |
133 |
| - std::string value=line.substr(pos, std::string::npos); |
134 |
| - |
135 |
| - // trim off \r |
136 |
| - if(!value.empty() && *value.rbegin()=='\r') |
137 |
| - value.resize(value.size()-1); |
138 |
| - |
139 |
| - // store |
140 |
| - manifest[key]=value; |
| 80 | + std::istringstream in(this->get_entry(entry->first)); |
| 81 | + std::string line; |
| 82 | + while(std::getline(in, line)) |
| 83 | + { |
| 84 | + const auto key_end=std::find(line.cbegin(), line.cend(), ':'); |
| 85 | + if(key_end!=line.cend()) |
| 86 | + out.emplace( |
| 87 | + trim(line.cbegin(), key_end), |
| 88 | + trim(std::next(key_end), line.cend())); |
| 89 | + } |
141 | 90 | }
|
| 91 | + return out; |
| 92 | +} |
142 | 93 |
|
143 |
| - return manifest; |
| 94 | +std::vector<std::string> jar_filet::filenames() const |
| 95 | +{ |
| 96 | + std::vector<std::string> out; |
| 97 | + for(const auto &pair : m_name_to_index) |
| 98 | + out.emplace_back(pair.first); |
| 99 | + return out; |
144 | 100 | }
|
0 commit comments