Skip to content

Commit 54043e8

Browse files
committed
Improved memory consumption during extraction of Zip files
1 parent 0557972 commit 54043e8

File tree

3 files changed

+38
-27
lines changed

3 files changed

+38
-27
lines changed

extract.go

-6
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@
2727
package extract
2828

2929
import (
30-
"bytes"
3130
"context"
3231
"io"
3332
"os"
@@ -70,11 +69,6 @@ func Gz(ctx context.Context, body io.Reader, location string, rename Renamer) er
7069
return extractor.Gz(ctx, body, location, rename)
7170
}
7271

73-
type file struct {
74-
Path string
75-
Mode os.FileMode
76-
Data bytes.Buffer
77-
}
7872
type link struct {
7973
Name string
8074
Path string

extract_test.go

+27
Original file line numberDiff line numberDiff line change
@@ -382,6 +382,33 @@ func TestTarGzMemoryConsumption(t *testing.T) {
382382
require.True(t, heapUsed < 5000000, "heap consumption should be less than 5M but is %d", heapUsed)
383383
}
384384

385+
func TestZipMemoryConsumption(t *testing.T) {
386+
archive := paths.New("testdata/big.zip")
387+
err := download(t, "http://downloads.arduino.cc/tools/gcc-arm-none-eabi-7-2017-q4-major-win32-arduino1.zip", archive)
388+
require.NoError(t, err)
389+
390+
tmpDir, err := paths.MkTempDir("", "")
391+
require.NoError(t, err)
392+
defer tmpDir.RemoveAll()
393+
394+
f, err := archive.Open()
395+
require.NoError(t, err)
396+
397+
var m, m2 runtime.MemStats
398+
runtime.GC()
399+
runtime.ReadMemStats(&m)
400+
401+
err = extract.Zip(context.Background(), f, tmpDir.String(), nil)
402+
require.NoError(t, err)
403+
404+
runtime.ReadMemStats(&m2)
405+
heapUsed := m2.HeapInuse - m.HeapInuse
406+
fmt.Println("Heap memory used during the test:", heapUsed)
407+
// the .zip file require random access, so the full io.Reader content must be cached, since
408+
// the test file is 130MB, that's the reason for the 180+ Mb of memory consumed.
409+
require.True(t, heapUsed < 200000000, "heap consumption should be less than 200M but is %d", heapUsed)
410+
}
411+
385412
func download(t require.TestingT, url string, file *paths.Path) error {
386413
if file.Exist() {
387414
return nil

extractor.go

+11-21
Original file line numberDiff line numberDiff line change
@@ -196,7 +196,6 @@ func (e *Extractor) Zip(ctx context.Context, body io.Reader, location string, re
196196
return errors.Annotatef(err, "Read the zip file")
197197
}
198198

199-
files := []file{}
200199
links := []link{}
201200

202201
// We make the first pass creating the directory structure, or we could end up
@@ -239,35 +238,26 @@ func (e *Extractor) Zip(ctx context.Context, body io.Reader, location string, re
239238
}
240239
// We only check for symlinks because hard links aren't possible
241240
case info.Mode()&os.ModeSymlink != 0:
242-
f, err := header.Open()
243-
if err != nil {
241+
if f, err := header.Open(); err != nil {
244242
return errors.Annotatef(err, "Open link %s", path)
245-
}
246-
name, err := ioutil.ReadAll(f)
247-
if err != nil {
243+
} else if name, err := ioutil.ReadAll(f); err != nil {
248244
return errors.Annotatef(err, "Read address of link %s", path)
245+
} else {
246+
links = append(links, link{Path: path, Name: string(name)})
247+
f.Close()
249248
}
250-
links = append(links, link{Path: path, Name: string(name)})
251249
default:
252-
f, err := header.Open()
253-
if err != nil {
250+
if f, err := header.Open(); err != nil {
254251
return errors.Annotatef(err, "Open file %s", path)
252+
} else if err := e.copy(ctx, path, info.Mode(), f); err != nil {
253+
return errors.Annotatef(err, "Create file %s", path)
254+
} else {
255+
f.Close()
255256
}
256-
var data bytes.Buffer
257-
if _, err := copyCancel(ctx, &data, f); err != nil {
258-
return errors.Annotatef(err, "Read contents of file %s", path)
259-
}
260-
files = append(files, file{Path: path, Mode: info.Mode(), Data: data})
261-
}
262-
}
263-
264-
// Now we make another pass creating the files and links
265-
for i := range files {
266-
if err := e.copy(ctx, files[i].Path, files[i].Mode, &files[i].Data); err != nil {
267-
return errors.Annotatef(err, "Create file %s", files[i].Path)
268257
}
269258
}
270259

260+
// Now we make another pass creating the links
271261
for i := range links {
272262
select {
273263
case <-ctx.Done():

0 commit comments

Comments
 (0)