1
1
use crate :: error:: Result ;
2
2
use crate :: storage:: { compression:: CompressionAlgorithm , FileRange } ;
3
3
use anyhow:: { bail, Context as _} ;
4
- use serde:: { Deserialize , Serialize } ;
4
+ use memmap:: MmapOptions ;
5
+ use serde:: de:: DeserializeSeed ;
6
+ use serde:: de:: { IgnoredAny , MapAccess , Visitor } ;
7
+ use serde:: { Deserialize , Deserializer , Serialize } ;
5
8
use std:: collections:: HashMap ;
6
- use std:: io;
7
- use std:: path:: { Path , PathBuf } ;
9
+ use std:: fmt;
10
+ use std:: path:: Path ;
11
+ use std:: { fs, io} ;
8
12
9
13
#[ derive( Deserialize , Serialize ) ]
10
14
pub ( crate ) struct FileInfo {
@@ -21,51 +25,155 @@ impl FileInfo {
21
25
}
22
26
}
23
27
24
- #[ derive( Deserialize , Serialize ) ]
25
- pub ( crate ) struct Index {
26
- files : HashMap < PathBuf , FileInfo > ,
28
+ #[ derive( Serialize ) ]
29
+ struct Index {
30
+ files : HashMap < String , FileInfo > ,
27
31
}
28
32
29
- impl Index {
30
- pub ( crate ) fn load ( reader : impl io:: Read ) -> Result < Index > {
31
- serde_cbor:: from_reader ( reader) . context ( "deserialization error" )
33
+ pub ( crate ) fn create < R : io:: Read + io:: Seek , W : io:: Write > (
34
+ zipfile : & mut R ,
35
+ writer : & mut W ,
36
+ ) -> Result < ( ) > {
37
+ let mut archive = zip:: ZipArchive :: new ( zipfile) ?;
38
+
39
+ // get file locations
40
+ let mut files: HashMap < String , FileInfo > = HashMap :: with_capacity ( archive. len ( ) ) ;
41
+ for i in 0 ..archive. len ( ) {
42
+ let zf = archive. by_index ( i) ?;
43
+
44
+ files. insert (
45
+ zf. name ( ) . to_string ( ) ,
46
+ FileInfo {
47
+ range : FileRange :: new ( zf. data_start ( ) , zf. data_start ( ) + zf. compressed_size ( ) - 1 ) ,
48
+ compression : match zf. compression ( ) {
49
+ zip:: CompressionMethod :: Bzip2 => CompressionAlgorithm :: Bzip2 ,
50
+ c => bail ! ( "unsupported compression algorithm {} in zip-file" , c) ,
51
+ } ,
52
+ } ,
53
+ ) ;
32
54
}
33
55
34
- pub ( crate ) fn save ( & self , writer : impl io:: Write ) -> Result < ( ) > {
35
- serde_cbor:: to_writer ( writer, self ) . context ( "serialization error" )
56
+ serde_cbor:: to_writer ( writer, & Index { files } ) . context ( "serialization error" )
57
+ }
58
+
59
+ pub ( crate ) fn find_in_slice ( bytes : & [ u8 ] , search_for : & str ) -> Result < Option < FileInfo > > {
60
+ let mut deserializer = serde_cbor:: Deserializer :: from_slice ( bytes) ;
61
+
62
+ /// This visitor will just find the `files` element in the top-level map.
63
+ /// Then it will call the `FindFileVisitor` that should find the actual
64
+ /// FileInfo for the path we are searching for.
65
+ struct FindFileListVisitor {
66
+ search_for : String ,
36
67
}
37
68
38
- pub ( crate ) fn new_from_zip < R : io:: Read + io:: Seek > ( zipfile : & mut R ) -> Result < Index > {
39
- let mut archive = zip:: ZipArchive :: new ( zipfile) ?;
40
-
41
- // get file locations
42
- let mut files: HashMap < PathBuf , FileInfo > = HashMap :: with_capacity ( archive. len ( ) ) ;
43
- for i in 0 ..archive. len ( ) {
44
- let zf = archive. by_index ( i) ?;
45
-
46
- files. insert (
47
- PathBuf :: from ( zf. name ( ) ) ,
48
- FileInfo {
49
- range : FileRange :: new (
50
- zf. data_start ( ) ,
51
- zf. data_start ( ) + zf. compressed_size ( ) - 1 ,
52
- ) ,
53
- compression : match zf. compression ( ) {
54
- zip:: CompressionMethod :: Bzip2 => CompressionAlgorithm :: Bzip2 ,
55
- c => bail ! ( "unsupported compression algorithm {} in zip-file" , c) ,
56
- } ,
57
- } ,
58
- ) ;
69
+ impl FindFileListVisitor {
70
+ pub fn new ( path : String ) -> Self {
71
+ FindFileListVisitor { search_for : path }
59
72
}
73
+ }
60
74
61
- Ok ( Index { files } )
75
+ impl < ' de > Visitor < ' de > for FindFileListVisitor {
76
+ type Value = Option < FileInfo > ;
77
+
78
+ fn expecting ( & self , formatter : & mut fmt:: Formatter ) -> fmt:: Result {
79
+ write ! ( formatter, "a map with a 'files' key" )
80
+ }
81
+
82
+ fn visit_map < V > ( self , mut map : V ) -> Result < Self :: Value , V :: Error >
83
+ where
84
+ V : MapAccess < ' de > ,
85
+ {
86
+ /// This visitor will walk the full `files` map and search for
87
+ /// the path we want to have.
88
+ /// Return value is just the `FileInfo` we want to have, or
89
+ /// `None`.
90
+ struct FindFileVisitor {
91
+ search_for : String ,
92
+ }
93
+
94
+ impl FindFileVisitor {
95
+ pub fn new ( search_for : String ) -> Self {
96
+ FindFileVisitor { search_for }
97
+ }
98
+ }
99
+
100
+ impl < ' de > DeserializeSeed < ' de > for FindFileVisitor {
101
+ type Value = Option < FileInfo > ;
102
+ fn deserialize < D > ( self , deserializer : D ) -> Result < Self :: Value , D :: Error >
103
+ where
104
+ D : Deserializer < ' de > ,
105
+ {
106
+ deserializer. deserialize_map ( self )
107
+ }
108
+ }
109
+
110
+ impl < ' de > Visitor < ' de > for FindFileVisitor {
111
+ type Value = Option < FileInfo > ;
112
+ fn expecting ( & self , formatter : & mut fmt:: Formatter ) -> fmt:: Result {
113
+ write ! (
114
+ formatter,
115
+ "a map with path => FileInfo, searching for path {:?}" ,
116
+ self . search_for
117
+ )
118
+ }
119
+ fn visit_map < V > ( self , mut map : V ) -> Result < Self :: Value , V :: Error >
120
+ where
121
+ V : MapAccess < ' de > ,
122
+ {
123
+ while let Some ( key) = map. next_key :: < & str > ( ) ? {
124
+ if key == self . search_for {
125
+ let value = map. next_value :: < FileInfo > ( ) ?;
126
+ // skip over the rest of the data without really parsing it.
127
+ // If we don't do this the serde_cbor deserializer fails because not
128
+ // the whole map is consumed.
129
+ while map. next_entry :: < IgnoredAny , IgnoredAny > ( ) ?. is_some ( ) { }
130
+ return Ok ( Some ( value) ) ;
131
+ } else {
132
+ // skip parsing the FileInfo structure when the key doesn't match.
133
+ map. next_value :: < IgnoredAny > ( ) ?;
134
+ }
135
+ }
136
+
137
+ Ok ( None )
138
+ }
139
+ }
140
+
141
+ while let Some ( key) = map. next_key :: < & str > ( ) ? {
142
+ if key == "files" {
143
+ return map. next_value_seed ( FindFileVisitor :: new ( self . search_for ) ) ;
144
+ }
145
+ }
146
+
147
+ Ok ( None )
148
+ }
62
149
}
63
150
64
- pub ( crate ) fn find_file < P : AsRef < Path > > ( & self , path : P ) -> Result < & FileInfo > {
65
- self . files
66
- . get ( path. as_ref ( ) )
67
- . ok_or_else ( || super :: PathNotFoundError . into ( ) )
151
+ impl < ' de > DeserializeSeed < ' de > for FindFileListVisitor {
152
+ type Value = Option < FileInfo > ;
153
+
154
+ fn deserialize < D > ( self , deserializer : D ) -> Result < Self :: Value , D :: Error >
155
+ where
156
+ D : Deserializer < ' de > ,
157
+ {
158
+ deserializer. deserialize_map ( self )
159
+ }
68
160
}
161
+
162
+ Ok ( FindFileListVisitor :: new ( search_for. to_string ( ) ) . deserialize ( & mut deserializer) ?)
163
+ }
164
+
165
+ pub ( crate ) fn find_in_file < P : AsRef < Path > > (
166
+ archive_index_path : P ,
167
+ search_for : & str ,
168
+ ) -> Result < Option < FileInfo > > {
169
+ let file = fs:: File :: open ( archive_index_path) . context ( "could not open file" ) ?;
170
+ let mmap = unsafe {
171
+ MmapOptions :: new ( )
172
+ . map ( & file)
173
+ . context ( "could not create memory map" ) ?
174
+ } ;
175
+
176
+ find_in_slice ( & mmap, search_for)
69
177
}
70
178
71
179
#[ cfg( test) ]
@@ -74,14 +182,6 @@ mod tests {
74
182
use std:: io:: Write ;
75
183
use zip:: write:: FileOptions ;
76
184
77
- fn validate_index ( index : & Index ) {
78
- assert_eq ! ( index. files. len( ) , 1 ) ;
79
-
80
- let fi = index. files . get ( & PathBuf :: from ( "testfile1" ) ) . unwrap ( ) ;
81
- assert_eq ! ( fi. range, FileRange :: new( 39 , 459 ) ) ;
82
- assert_eq ! ( fi. compression, CompressionAlgorithm :: Bzip2 ) ;
83
- }
84
-
85
185
#[ test]
86
186
fn index_create_save_load ( ) {
87
187
let mut tf = tempfile:: tempfile ( ) . unwrap ( ) ;
@@ -98,13 +198,13 @@ mod tests {
98
198
archive. write_all ( & objectcontent) . unwrap ( ) ;
99
199
tf = archive. finish ( ) . unwrap ( ) ;
100
200
101
- let index = Index :: new_from_zip ( & mut tf) . unwrap ( ) ;
102
- validate_index ( & index) ;
103
-
104
201
let mut buf = Vec :: new ( ) ;
105
- index. save ( & mut buf) . unwrap ( ) ;
202
+ create ( & mut tf, & mut buf) . unwrap ( ) ;
203
+
204
+ let fi = find_in_slice ( & buf, "testfile1" ) . unwrap ( ) . unwrap ( ) ;
205
+ assert_eq ! ( fi. range, FileRange :: new( 39 , 459 ) ) ;
206
+ assert_eq ! ( fi. compression, CompressionAlgorithm :: Bzip2 ) ;
106
207
107
- let new_index = Index :: load ( io:: Cursor :: new ( & buf) ) . unwrap ( ) ;
108
- validate_index ( & new_index) ;
208
+ assert ! ( find_in_slice( & buf, "some_other_file" ) . unwrap( ) . is_none( ) ) ;
109
209
}
110
210
}
0 commit comments