Skip to content

Commit 87c6c9e

Browse files
WillAydproost
authored andcommitted
Clean Up Case Insensitive Comps in Tokenizer (pandas-dev#29534)
1 parent 049bd03 commit 87c6c9e

File tree

3 files changed

+22
-43
lines changed

3 files changed

+22
-43
lines changed

pandas/_libs/src/parse_helper.h

-10
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,6 @@ The full license is in the LICENSE file, distributed with this software.
1111
#define PANDAS__LIBS_SRC_PARSE_HELPER_H_
1212

1313
#include <float.h>
14-
#include "inline_helper.h"
15-
#include "headers/portable.h"
1614
#include "parser/tokenizer.h"
1715

1816
int to_double(char *item, double *p_value, char sci, char decimal,
@@ -94,12 +92,4 @@ int floatify(PyObject *str, double *result, int *maybe_int) {
9492
return -1;
9593
}
9694

97-
PANDAS_INLINE void lowercase(char *p) {
98-
for (; *p; ++p) *p = tolower_ascii(*p);
99-
}
100-
101-
PANDAS_INLINE void uppercase(char *p) {
102-
for (; *p; ++p) *p = toupper_ascii(*p);
103-
}
104-
10595
#endif // PANDAS__LIBS_SRC_PARSE_HELPER_H_

pandas/_libs/src/parser/tokenizer.c

+21-33
Original file line numberDiff line numberDiff line change
@@ -1426,42 +1426,30 @@ int tokenize_all_rows(parser_t *self) {
14261426
return status;
14271427
}
14281428

1429-
PANDAS_INLINE void uppercase(char *p) {
1430-
for (; *p; ++p) *p = toupper_ascii(*p);
1431-
}
1432-
1429+
/*
1430+
* Function: to_boolean
1431+
* --------------------
1432+
*
1433+
* Validate if item should be recognized as a boolean field.
1434+
*
1435+
* item: const char* representing parsed text
1436+
* val : pointer to a uint8_t of boolean representation
1437+
*
1438+
* If item is determined to be boolean, this method will set
1439+
* the appropriate value of val and return 0. A non-zero exit
1440+
* status means that item was not inferred to be boolean, and
1441+
* leaves the value of *val unmodified.
1442+
*/
14331443
int to_boolean(const char *item, uint8_t *val) {
1434-
char *tmp;
1435-
int i, status = 0;
1436-
size_t length0 = (strlen(item) + 1);
1437-
int bufsize = length0;
1438-
1439-
static const char *tstrs[1] = {"TRUE"};
1440-
static const char *fstrs[1] = {"FALSE"};
1441-
1442-
tmp = malloc(bufsize);
1443-
snprintf(tmp, length0, "%s", item);
1444-
uppercase(tmp);
1445-
1446-
for (i = 0; i < 1; ++i) {
1447-
if (strcmp(tmp, tstrs[i]) == 0) {
1448-
*val = 1;
1449-
goto done;
1450-
}
1444+
if (strcasecmp(item, "TRUE") == 0) {
1445+
*val = 1;
1446+
return 0;
1447+
} else if (strcasecmp(item, "FALSE") == 0) {
1448+
*val = 0;
1449+
return 0;
14511450
}
14521451

1453-
for (i = 0; i < 1; ++i) {
1454-
if (strcmp(tmp, fstrs[i]) == 0) {
1455-
*val = 0;
1456-
goto done;
1457-
}
1458-
}
1459-
1460-
status = -1;
1461-
1462-
done:
1463-
free(tmp);
1464-
return status;
1452+
return -1;
14651453
}
14661454

14671455
// ---------------------------------------------------------------------------

pandas/_libs/src/parser/tokenizer.h

+1
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ See LICENSE for the license
2222

2323
#include "../headers/stdint.h"
2424
#include "../inline_helper.h"
25+
#include "../headers/portable.h"
2526

2627
#include "khash.h"
2728

0 commit comments

Comments
 (0)