Skip to content

Commit c5926fb

Browse files
authored
Merge pull request #4349 from Cyan4973/devfd
Support process substitution for `--filelist=`
2 parents 7f0519d + 2f96278 commit c5926fb

File tree

8 files changed

+256
-72
lines changed

8 files changed

+256
-72
lines changed

.github/workflows/dev-long-tests.yml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,10 @@ jobs:
2828
steps:
2929
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # tag=v4.2.2
3030
- name: make test
31-
run: make test
31+
run: |
32+
make test
33+
make -j zstd
34+
./tests/test_process_substitution.bash ./zstd
3235
3336
# lasts ~26mn
3437
make-test-macos:

programs/fileio.c

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -538,14 +538,16 @@ static int FIO_removeFile(const char* path)
538538
}
539539

540540
/** FIO_openSrcFile() :
541-
* condition : `srcFileName` must be non-NULL. `prefs` may be NULL.
541+
* condition : `srcFileName` must be non-NULL.
542+
* optional: `prefs` may be NULL.
542543
* @result : FILE* to `srcFileName`, or NULL if it fails */
543544
static FILE* FIO_openSrcFile(const FIO_prefs_t* const prefs, const char* srcFileName, stat_t* statbuf)
544545
{
545546
int allowBlockDevices = prefs != NULL ? prefs->allowBlockDevices : 0;
546547
assert(srcFileName != NULL);
547548
assert(statbuf != NULL);
548-
if (!strcmp (srcFileName, stdinmark)) {
549+
550+
if (!strcmp(srcFileName, stdinmark)) {
549551
DISPLAYLEVEL(4,"Using stdin for input \n");
550552
SET_BINARY_MODE(stdin);
551553
return stdin;
@@ -557,8 +559,10 @@ static FILE* FIO_openSrcFile(const FIO_prefs_t* const prefs, const char* srcFile
557559
return NULL;
558560
}
559561

562+
/* Accept regular files, FIFOs, and process substitution file descriptors */
560563
if (!UTIL_isRegularFileStat(statbuf)
561564
&& !UTIL_isFIFOStat(statbuf)
565+
&& !UTIL_isFileDescriptorPipe(srcFileName) /* Process substitution support */
562566
&& !(allowBlockDevices && UTIL_isBlockDevStat(statbuf))
563567
) {
564568
DISPLAYLEVEL(1, "zstd: %s is not a regular file -- ignored \n",
@@ -655,7 +659,11 @@ FIO_openDstFile(FIO_ctx_t* fCtx, FIO_prefs_t* const prefs,
655659
}
656660
#endif
657661
if (f == NULL) {
658-
DISPLAYLEVEL(1, "zstd: %s: %s\n", dstFileName, strerror(errno));
662+
if (UTIL_isFileDescriptorPipe(dstFileName)) {
663+
DISPLAYLEVEL(1, "zstd: error: no output specified (use -o or -c). \n");
664+
} else {
665+
DISPLAYLEVEL(1, "zstd: %s: %s\n", dstFileName, strerror(errno));
666+
}
659667
} else {
660668
/* An increased buffer size can provide a significant performance
661669
* boost on some platforms. Note that providing a NULL buf with a

programs/util.c

Lines changed: 142 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,8 @@ int UTIL_requireUserConfirmation(const char* prompt, const char* abortMsg,
151151
/*-*************************************
152152
* Constants
153153
***************************************/
154-
#define LIST_SIZE_INCREASE (8*1024)
154+
#define KB * (1 << 10)
155+
#define LIST_SIZE_INCREASE (8 KB)
155156
#define MAX_FILE_OF_FILE_NAMES_SIZE (1<<20)*50
156157

157158

@@ -448,6 +449,26 @@ int UTIL_isFIFOStat(const stat_t* statbuf)
448449
return 0;
449450
}
450451

452+
/* process substitution */
453+
int UTIL_isFileDescriptorPipe(const char* filename)
454+
{
455+
UTIL_TRACE_CALL("UTIL_isFileDescriptorPipe(%s)", filename);
456+
/* Check if the filename is a /dev/fd/ path which indicates a file descriptor */
457+
if (filename[0] == '/' && strncmp(filename, "/dev/fd/", 8) == 0) {
458+
UTIL_TRACE_RET(1);
459+
return 1;
460+
}
461+
462+
/* Check for alternative process substitution formats on different systems */
463+
if (filename[0] == '/' && strncmp(filename, "/proc/self/fd/", 14) == 0) {
464+
UTIL_TRACE_RET(1);
465+
return 1;
466+
}
467+
468+
UTIL_TRACE_RET(0);
469+
return 0; /* Not recognized as a file descriptor pipe */
470+
}
471+
451472
/* UTIL_isBlockDevStat : distinguish named pipes */
452473
int UTIL_isBlockDevStat(const stat_t* statbuf)
453474
{
@@ -614,101 +635,157 @@ U64 UTIL_getTotalFileSize(const char* const * fileNamesTable, unsigned nbFiles)
614635
}
615636

616637

617-
/* condition : @file must be valid, and not have reached its end.
618-
* @return : length of line written into @buf, ended with `\0` instead of '\n',
619-
* or 0, if there is no new line */
620-
static size_t readLineFromFile(char* buf, size_t len, FILE* file)
638+
/* Read the entire content of a file into a buffer with progressive resizing */
639+
static char* UTIL_readFileContent(FILE* inFile, size_t* totalReadPtr)
621640
{
622-
assert(!feof(file));
623-
if ( fgets(buf, (int) len, file) == NULL ) return 0;
624-
{ size_t linelen = strlen(buf);
625-
if (strlen(buf)==0) return 0;
626-
if (buf[linelen-1] == '\n') linelen--;
627-
buf[linelen] = '\0';
628-
return linelen+1;
641+
size_t bufSize = 64 KB; /* Start with a reasonable buffer size */
642+
size_t totalRead = 0;
643+
size_t bytesRead = 0;
644+
char* buf = (char*)malloc(bufSize);
645+
if (buf == NULL) return NULL;
646+
647+
648+
/* Read the file incrementally */
649+
while ((bytesRead = fread(buf + totalRead, 1, bufSize - totalRead - 1, inFile)) > 0) {
650+
totalRead += bytesRead;
651+
652+
/* If buffer is nearly full, expand it */
653+
if (bufSize - totalRead < 1 KB) {
654+
if (bufSize >= MAX_FILE_OF_FILE_NAMES_SIZE) {
655+
/* Too large, abort */
656+
free(buf);
657+
return NULL;
658+
}
659+
660+
{ size_t newBufSize = bufSize * 2;
661+
if (newBufSize > MAX_FILE_OF_FILE_NAMES_SIZE)
662+
newBufSize = MAX_FILE_OF_FILE_NAMES_SIZE;
663+
664+
{ char* newBuf = (char*)realloc(buf, newBufSize);
665+
if (newBuf == NULL) {
666+
free(buf);
667+
return NULL;
668+
}
669+
670+
buf = newBuf;
671+
bufSize = newBufSize;
672+
} } }
629673
}
674+
675+
/* Add null terminator to the end */
676+
buf[totalRead] = '\0';
677+
*totalReadPtr = totalRead;
678+
679+
return buf;
630680
}
631681

632-
/* Conditions :
633-
* size of @inputFileName file must be < @dstCapacity
634-
* @dst must be initialized
635-
* @return : nb of lines
636-
* or -1 if there's an error
637-
*/
638-
static int
639-
readLinesFromFile(void* dst, size_t dstCapacity,
640-
const char* inputFileName)
682+
/* Process a buffer containing multiple lines and count the number of lines */
683+
static size_t UTIL_processLines(char* buffer, size_t bufferSize)
641684
{
642-
int nbFiles = 0;
643-
size_t pos = 0;
644-
char* const buf = (char*)dst;
645-
FILE* const inputFile = fopen(inputFileName, "r");
685+
size_t lineCount = 0;
686+
size_t i = 0;
646687

647-
assert(dst != NULL);
688+
/* Convert newlines to null terminators and count lines */
689+
while (i < bufferSize) {
690+
if (buffer[i] == '\n') {
691+
buffer[i] = '\0'; /* Replace newlines with null terminators */
692+
lineCount++;
693+
}
694+
i++;
695+
}
648696

649-
if(!inputFile) {
650-
if (g_utilDisplayLevel >= 1) perror("zstd:util:readLinesFromFile");
651-
return -1;
697+
/* Count the last line if it doesn't end with a newline */
698+
if (bufferSize > 0 && (i == 0 || buffer[i-1] != '\0')) {
699+
lineCount++;
652700
}
653701

654-
while ( !feof(inputFile) ) {
655-
size_t const lineLength = readLineFromFile(buf+pos, dstCapacity-pos, inputFile);
656-
if (lineLength == 0) break;
657-
assert(pos + lineLength <= dstCapacity); /* '=' for inputFile not terminated with '\n' */
658-
pos += lineLength;
659-
++nbFiles;
702+
return lineCount;
703+
}
704+
705+
/* Create an array of pointers to the lines in a buffer */
706+
static const char** UTIL_createLinePointers(char* buffer, size_t numLines, size_t bufferSize)
707+
{
708+
size_t lineIndex = 0;
709+
size_t pos = 0;
710+
void* const bufferPtrs = malloc(numLines * sizeof(const char**));
711+
const char** const linePointers = (const char**)bufferPtrs;
712+
if (bufferPtrs == NULL) return NULL;
713+
714+
while (lineIndex < numLines && pos < bufferSize) {
715+
size_t len = 0;
716+
linePointers[lineIndex++] = buffer+pos;
717+
718+
/* Find the next null terminator, being careful not to go past the buffer */
719+
while ((pos + len < bufferSize) && buffer[pos + len] != '\0') {
720+
len++;
721+
}
722+
723+
/* Move past this string and its null terminator */
724+
pos += len;
725+
if (pos < bufferSize) pos++; /* Skip the null terminator if we're not at buffer end */
660726
}
661727

662-
CONTROL( fclose(inputFile) == 0 );
728+
/* Verify we processed the expected number of lines */
729+
if (lineIndex != numLines) {
730+
/* Something went wrong - we didn't find as many lines as expected */
731+
free(bufferPtrs);
732+
return NULL;
733+
}
663734

664-
return nbFiles;
735+
return linePointers;
665736
}
666737

667-
/*Note: buf is not freed in case function successfully created table because filesTable->fileNames[0] = buf*/
668738
FileNamesTable*
669-
UTIL_createFileNamesTable_fromFileName(const char* inputFileName)
739+
UTIL_createFileNamesTable_fromFileList(const char* fileList)
670740
{
671-
size_t nbFiles = 0;
672-
char* buf;
673-
size_t bufSize;
674741
stat_t statbuf;
742+
char* buffer = NULL;
743+
size_t numLines = 0;
744+
size_t bufferSize = 0;
675745

676-
if (!UTIL_stat(inputFileName, &statbuf) || !UTIL_isRegularFileStat(&statbuf))
746+
/* Check if the input is a valid file */
747+
if (!UTIL_stat(fileList, &statbuf)) {
677748
return NULL;
678-
679-
{ U64 const inputFileSize = UTIL_getFileSizeStat(&statbuf);
680-
if(inputFileSize > MAX_FILE_OF_FILE_NAMES_SIZE)
681-
return NULL;
682-
bufSize = (size_t)(inputFileSize + 1); /* (+1) to add '\0' at the end of last filename */
683749
}
684750

685-
buf = (char*) malloc(bufSize);
686-
CONTROL( buf != NULL );
751+
/* Check if the input is a supported type */
752+
if (!UTIL_isRegularFileStat(&statbuf) &&
753+
!UTIL_isFIFOStat(&statbuf) &&
754+
!UTIL_isFileDescriptorPipe(fileList)) {
755+
return NULL;
756+
}
687757

688-
{ int const ret_nbFiles = readLinesFromFile(buf, bufSize, inputFileName);
758+
/* Open the input file */
759+
{ FILE* const inFile = fopen(fileList, "rb");
760+
if (inFile == NULL) return NULL;
689761

690-
if (ret_nbFiles <= 0) {
691-
free(buf);
692-
return NULL;
693-
}
694-
nbFiles = (size_t)ret_nbFiles;
762+
/* Read the file content */
763+
buffer = UTIL_readFileContent(inFile, &bufferSize);
764+
fclose(inFile);
695765
}
696766

697-
{ const char** filenamesTable = (const char**) malloc(nbFiles * sizeof(*filenamesTable));
698-
CONTROL(filenamesTable != NULL);
767+
if (buffer == NULL) return NULL;
699768

700-
{ size_t fnb, pos = 0;
701-
for (fnb = 0; fnb < nbFiles; fnb++) {
702-
filenamesTable[fnb] = buf+pos;
703-
pos += strlen(buf+pos)+1; /* +1 for the finishing `\0` */
704-
}
705-
assert(pos <= bufSize);
769+
/* Process lines */
770+
numLines = UTIL_processLines(buffer, bufferSize);
771+
if (numLines == 0) {
772+
free(buffer);
773+
return NULL;
774+
}
775+
776+
/* Create line pointers */
777+
{ const char** linePointers = UTIL_createLinePointers(buffer, numLines, bufferSize);
778+
if (linePointers == NULL) {
779+
free(buffer);
780+
return NULL;
706781
}
707782

708-
return UTIL_assembleFileNamesTable(filenamesTable, nbFiles, buf);
783+
/* Create the final table */
784+
return UTIL_assembleFileNamesTable(linePointers, numLines, buffer);
709785
}
710786
}
711787

788+
712789
static FileNamesTable*
713790
UTIL_assembleFileNamesTable2(const char** filenames, size_t tableSize, size_t tableCapacity, char* buf)
714791
{

programs/util.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,7 @@ int UTIL_isSameFileStat(const char* file1, const char* file2, const stat_t* file
191191
int UTIL_isCompressedFile(const char* infilename, const char *extensionList[]);
192192
int UTIL_isLink(const char* infilename);
193193
int UTIL_isFIFO(const char* infilename);
194+
int UTIL_isFileDescriptorPipe(const char* filename);
194195

195196
/**
196197
* Returns with the given file descriptor is a console.
@@ -250,13 +251,13 @@ typedef struct
250251
size_t tableCapacity;
251252
} FileNamesTable;
252253

253-
/*! UTIL_createFileNamesTable_fromFileName() :
254+
/*! UTIL_createFileNamesTable_fromFileList() :
254255
* read filenames from @inputFileName, and store them into returned object.
255256
* @return : a FileNamesTable*, or NULL in case of error (ex: @inputFileName doesn't exist).
256257
* Note: inputFileSize must be less than 50MB
257258
*/
258259
FileNamesTable*
259-
UTIL_createFileNamesTable_fromFileName(const char* inputFileName);
260+
UTIL_createFileNamesTable_fromFileList(const char* inputFileName);
260261

261262
/*! UTIL_assembleFileNamesTable() :
262263
* This function takes ownership of its arguments, @filenames and @buf,

programs/zstdcli.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1379,7 +1379,7 @@ int main(int argCount, const char* argv[])
13791379
size_t const nbFileLists = file_of_names->tableSize;
13801380
size_t flNb;
13811381
for (flNb=0; flNb < nbFileLists; flNb++) {
1382-
FileNamesTable* const fnt = UTIL_createFileNamesTable_fromFileName(file_of_names->fileNames[flNb]);
1382+
FileNamesTable* const fnt = UTIL_createFileNamesTable_fromFileList(file_of_names->fileNames[flNb]);
13831383
if (fnt==NULL) {
13841384
DISPLAYLEVEL(1, "zstd: error reading %s \n", file_of_names->fileNames[flNb]);
13851385
CLEAN_RETURN(1);

tests/cli-tests/file-stat/compress-file-to-dir-without-write-perm.sh.stderr.exact

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,5 +22,7 @@ Trace:FileStat: > UTIL_isRegularFile(out/file.zst)
2222
Trace:FileStat: > UTIL_stat(-1, out/file.zst)
2323
Trace:FileStat: < 0
2424
Trace:FileStat: < 0
25+
Trace:FileStat: > UTIL_isFileDescriptorPipe(out/file.zst)
26+
Trace:FileStat: < 0
2527
zstd: out/file.zst: Permission denied
2628
zstd: can't stat out/file.zst : Permission denied -- ignored

tests/playTests.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -848,6 +848,7 @@ ls tmp* > tmpList
848848
zstd -f tmp1 --filelist=tmpList --filelist=tmpList tmp2 tmp3 # can trigger an overflow of internal file list
849849
rm -rf tmp*
850850

851+
851852
println "\n===> --[no-]content-size tests"
852853

853854
datagen > tmp_contentsize

0 commit comments

Comments
 (0)