|
15 | 15 | License along with the GNU C Library; if not, see
|
16 | 16 | <https://www.gnu.org/licenses/>. */
|
17 | 17 |
|
18 |
| -#include <stdlib.h> |
| 18 | +#include <atomic.h> |
| 19 | +#include <setenv.h> |
19 | 20 | #include <string.h>
|
20 | 21 | #include <unistd.h>
|
21 | 22 |
|
| 23 | +struct environ_array *__environ_array_list; |
| 24 | +environ_counter __environ_counter; |
| 25 | + |
22 | 26 | char *
|
23 | 27 | getenv (const char *name)
|
24 | 28 | {
|
25 |
| - if (__environ == NULL || name[0] == '\0') |
26 |
| - return NULL; |
27 |
| - |
28 |
| - size_t len = strlen (name); |
29 |
| - for (char **ep = __environ; *ep != NULL; ++ep) |
| 29 | + while (true) |
30 | 30 | {
|
31 |
| - if (name[0] == (*ep)[0] |
32 |
| - && strncmp (name, *ep, len) == 0 && (*ep)[len] == '=') |
33 |
| - return *ep + len + 1; |
34 |
| - } |
| 31 | + /* Used to deal with concurrent unsetenv. */ |
| 32 | + environ_counter start_counter = atomic_load_acquire (&__environ_counter); |
| 33 | + |
| 34 | + /* We use relaxed MO for loading the string pointers because we |
| 35 | + assume the strings themselves are immutable and that loads |
| 36 | + through the string pointers carry a dependency. (This |
| 37 | + depends on the the release MO store to __environ in |
| 38 | + __add_to_environ.) Objects pointed to by pointers stored in |
| 39 | + the __environ array are never modified or deallocated (except |
| 40 | + perhaps if putenv is used, but then synchronization is the |
| 41 | + responsibility of the applications). The backing store for |
| 42 | + __environ is allocated zeroed. In summary, we can assume |
| 43 | + that the pointers we observe are either valid or null, and |
| 44 | + that only initialized string contents is visible. */ |
| 45 | + char **start_environ = atomic_load_relaxed (&__environ); |
| 46 | + if (start_environ == NULL || name[0] == '\0') |
| 47 | + return NULL; |
| 48 | + |
| 49 | + size_t len = strlen (name); |
| 50 | + for (char **ep = start_environ; ; ++ep) |
| 51 | + { |
| 52 | + char *entry = atomic_load_relaxed (ep); |
| 53 | + if (entry == NULL) |
| 54 | + break; |
| 55 | + |
| 56 | + /* If there is a match, return that value. It was valid at |
| 57 | + one point, so we can return it. */ |
| 58 | + if (name[0] == entry[0] |
| 59 | + && strncmp (name, entry, len) == 0 && entry[len] == '=') |
| 60 | + return entry + len + 1; |
| 61 | + } |
| 62 | + |
| 63 | + /* The variable was not found. This might be a false negative |
| 64 | + because unsetenv has shuffled around entries. Check if it is |
| 65 | + necessary to retry. */ |
| 66 | + |
| 67 | + /* See Hans Boehm, Can Seqlocks Get Along with Programming Language |
| 68 | + Memory Models?, Section 4. This is necessary so that loads in |
| 69 | + the loop above are not ordered past the counter check below. */ |
| 70 | + atomic_thread_fence_acquire (); |
| 71 | + |
| 72 | + if (atomic_load_acquire (&__environ_counter) == start_counter) |
| 73 | + /* If we reach this point and there was a concurrent |
| 74 | + unsetenv call which removed the key we tried to find, the |
| 75 | + NULL return value is valid. We can also try again, not |
| 76 | + find the value, and then return NULL (assuming there are |
| 77 | + no further concurrent unsetenv calls). |
| 78 | +
|
| 79 | + However, if getenv is called to find a value that is |
| 80 | + present originally and not removed by any of the |
| 81 | + concurrent unsetenv calls, we must not return NULL here. |
| 82 | +
|
| 83 | + If the counter did not change, there was at most one |
| 84 | + write to the array in unsetenv while the scanning loop |
| 85 | + above was running. This means that there are at most two |
| 86 | + different versions of the array to consider. For the |
| 87 | + sake of argument, we assume that each load can make an |
| 88 | + independent choice which version to use. An arbitrary |
| 89 | + number of unsetenv and setenv calls may have happened |
| 90 | + since start of getenv. Lets write E[0], E[1], ... for |
| 91 | + the original environment elements, a(0) < (1) < ... for a |
| 92 | + sequence of increasing integers that are the indices of |
| 93 | + the environment variables remaining after the removals, and |
| 94 | + N[0], N[1], ... for the new variables added by setenv or |
| 95 | + putenv. Then at the start of the last unsetenv call, the |
| 96 | + environment contains |
| 97 | +
|
| 98 | + E[a(0)], E[a(1)], ..., N[0], N[1], ... |
35 | 99 |
|
36 |
| - return NULL; |
| 100 | + (the N[0], N[1], .... are optional.) Let's assume that |
| 101 | + we are looking for the value E[j]. Then one of the |
| 102 | + a(i) == j (otherwise we may return NULL here because |
| 103 | + of a unsetenv for the value we are looking for). In the |
| 104 | + discussion below it will become clear that the N[k] do |
| 105 | + not actually matter. |
| 106 | +
|
| 107 | + The two versions of array we can choose from differ only |
| 108 | + in one element, say E[a(i)]. There are two cases: |
| 109 | +
|
| 110 | + Case (A): E[a(i)] is an element being removed by unsetenv |
| 111 | + (the target of the first write). We can see the original |
| 112 | + version: |
| 113 | +
|
| 114 | + ..., E[a(i-1)], E[a(i)], E[a(i+1)], ..., N[0], ... |
| 115 | + ------- |
| 116 | + And the overwritten version: |
| 117 | +
|
| 118 | + ..., E[a(i-1)], E[a(i+1)], E[a(i+1)], ..., N[0], ... |
| 119 | + --------- |
| 120 | +
|
| 121 | + (The valueE[a(i+1)] can be the terminating NULL.) |
| 122 | + As discussed, we are not considering the removal of the |
| 123 | + variable being searched for, so a(i) != j, and the |
| 124 | + variable getenv is looking for is available in either |
| 125 | + version, and we would have found it above. |
| 126 | +
|
| 127 | + Case (B): E[a(i)] is an element that has already been |
| 128 | + moved forward and is now itself being overwritten with |
| 129 | + its sucessor value E[a(i+1)]. The two versions of the |
| 130 | + array look like this: |
| 131 | +
|
| 132 | + ..., E[a(i-1)], E[a(i)], E[a(i)], E[a(i+1)], ..., N[0], ... |
| 133 | + ------- |
| 134 | + And with the overwrite in place: |
| 135 | +
|
| 136 | + ..., E[a(i-1)], E[a(i)], E[a(i+1)], E[a(i+1)], ..., N[0], ... |
| 137 | + --------- |
| 138 | +
|
| 139 | + The key observation here is that even in the second |
| 140 | + version with the overwrite present, the scanning loop |
| 141 | + will still encounter the overwritten value E[a(i)] in the |
| 142 | + previous array element. This means that as long as the |
| 143 | + E[j] is still present among the initial E[a(...)] (as we |
| 144 | + assumed because there is no concurrent unsetenv for |
| 145 | + E[j]), we encounter it while scanning here in getenv. |
| 146 | +
|
| 147 | + In summary, if there was at most one write, a negative |
| 148 | + result is a true negative, and we can return NULL. This |
| 149 | + is different from the seqlock paper, which retries if |
| 150 | + there was any write at all. It avoids the need for a |
| 151 | + second, unwritten copy for async-signal-safety. */ |
| 152 | + return NULL; |
| 153 | + /* If there was one more write, retry. This will never happen |
| 154 | + in a signal handler that interrupted unsetenv because the |
| 155 | + suspended unsetenv call cannot change the counter value. */ |
| 156 | + } |
37 | 157 | }
|
38 | 158 | libc_hidden_def (getenv)
|
0 commit comments