Skip to content

Commit 4ef1fe4

Browse files
authored
Merge pull request #14918 from github/tausbn/python-support-tarslip-extraction-filters
Python: Add support for extraction filters
2 parents 30e62d3 + 6e27918 commit 4ef1fe4

File tree

4 files changed

+103
-5
lines changed

4 files changed

+103
-5
lines changed
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
category: minorAnalysis
3+
---
4+
5+
- Added support for tarfile extraction filters as defined in [PEP-706](https://peps.python.org/pep-0706). In particular, calls to `TarFile.extract`, and `TarFile.extractall` are no longer considered to be sinks for the `py/tarslip` query if a sufficiently safe filter is provided.

python/ql/lib/semmle/python/security/dataflow/TarSlipCustomizations.qll

Lines changed: 40 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -55,10 +55,38 @@ module TarSlip {
5555
ExcludeTarFilePy() { this.getLocation().getFile().getBaseName() = "tarfile.py" }
5656
}
5757

58+
/**
59+
* Holds if `call` has an unsafe extraction filter, either by default (as the default is unsafe),
60+
* or by being set to an explicitly unsafe value, such as `"fully_trusted"`, or `None`.
61+
*/
62+
private predicate hasUnsafeFilter(API::CallNode call) {
63+
call =
64+
API::moduleImport("tarfile")
65+
.getMember("open")
66+
.getReturn()
67+
.getMember(["extract", "extractall"])
68+
.getACall() and
69+
(
70+
exists(Expr filterValue |
71+
filterValue = call.getParameter(4, "filter").getAValueReachingSink().asExpr() and
72+
(
73+
filterValue.(StrConst).getText() = "fully_trusted"
74+
or
75+
filterValue instanceof None
76+
)
77+
)
78+
or
79+
not exists(call.getParameter(4, "filter"))
80+
)
81+
}
82+
5883
/**
5984
* A sink capturing method calls to `extractall`.
6085
*
61-
* For a call to `file.extractall` without arguments, `file` is considered a sink.
86+
* For a call to `file.extractall`, `file` is considered a sink if
87+
*
88+
* - there are no other arguments, or
89+
* - there are other arguments (except `members`), and the extraction filter is unsafe.
6290
*/
6391
class ExtractAllSink extends Sink {
6492
ExtractAllSink() {
@@ -69,8 +97,13 @@ module TarSlip {
6997
.getReturn()
7098
.getMember("extractall")
7199
.getACall() and
72-
not exists(call.getArg(_)) and
73-
not exists(call.getArgByName(_)) and
100+
(
101+
not exists(call.getArg(_)) and
102+
not exists(call.getArgByName(_))
103+
or
104+
hasUnsafeFilter(call)
105+
) and
106+
not exists(call.getArgByName("members")) and
74107
this = call.(DataFlow::MethodCallNode).getObject()
75108
)
76109
}
@@ -84,7 +117,8 @@ module TarSlip {
84117
exists(DataFlow::CallCfgNode call |
85118
call =
86119
API::moduleImport("tarfile").getMember("open").getReturn().getMember("extract").getACall() and
87-
this = call.getArg(0)
120+
this = call.getArg(0) and
121+
hasUnsafeFilter(call)
88122
)
89123
}
90124
}
@@ -99,7 +133,8 @@ module TarSlip {
99133
.getReturn()
100134
.getMember("extractall")
101135
.getACall() and
102-
this in [call.getArg(0), call.getArgByName("members")]
136+
this in [call.getArg(0), call.getArgByName("members")] and
137+
hasUnsafeFilter(call)
103138
)
104139
}
105140
}

python/ql/test/query-tests/Security/CWE-022-TarSlip/TarSlip.expected

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,15 @@ edges
1212
| tarslip.py:58:1:58:3 | GSSA Variable tar | tarslip.py:59:5:59:9 | GSSA Variable entry |
1313
| tarslip.py:58:7:58:39 | ControlFlowNode for Attribute() | tarslip.py:58:1:58:3 | GSSA Variable tar |
1414
| tarslip.py:59:5:59:9 | GSSA Variable entry | tarslip.py:61:21:61:25 | ControlFlowNode for entry |
15+
| tarslip.py:90:1:90:3 | GSSA Variable tar | tarslip.py:91:1:91:3 | ControlFlowNode for tar |
16+
| tarslip.py:90:7:90:39 | ControlFlowNode for Attribute() | tarslip.py:90:1:90:3 | GSSA Variable tar |
17+
| tarslip.py:94:1:94:3 | GSSA Variable tar | tarslip.py:95:5:95:9 | GSSA Variable entry |
18+
| tarslip.py:94:7:94:39 | ControlFlowNode for Attribute() | tarslip.py:94:1:94:3 | GSSA Variable tar |
19+
| tarslip.py:95:5:95:9 | GSSA Variable entry | tarslip.py:96:17:96:21 | ControlFlowNode for entry |
20+
| tarslip.py:109:1:109:3 | GSSA Variable tar | tarslip.py:110:1:110:3 | ControlFlowNode for tar |
21+
| tarslip.py:109:7:109:39 | ControlFlowNode for Attribute() | tarslip.py:109:1:109:3 | GSSA Variable tar |
22+
| tarslip.py:112:1:112:3 | GSSA Variable tar | tarslip.py:113:24:113:26 | ControlFlowNode for tar |
23+
| tarslip.py:112:7:112:39 | ControlFlowNode for Attribute() | tarslip.py:112:1:112:3 | GSSA Variable tar |
1524
nodes
1625
| tarslip.py:14:1:14:3 | GSSA Variable tar | semmle.label | GSSA Variable tar |
1726
| tarslip.py:14:7:14:39 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
@@ -31,10 +40,27 @@ nodes
3140
| tarslip.py:58:7:58:39 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
3241
| tarslip.py:59:5:59:9 | GSSA Variable entry | semmle.label | GSSA Variable entry |
3342
| tarslip.py:61:21:61:25 | ControlFlowNode for entry | semmle.label | ControlFlowNode for entry |
43+
| tarslip.py:90:1:90:3 | GSSA Variable tar | semmle.label | GSSA Variable tar |
44+
| tarslip.py:90:7:90:39 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
45+
| tarslip.py:91:1:91:3 | ControlFlowNode for tar | semmle.label | ControlFlowNode for tar |
46+
| tarslip.py:94:1:94:3 | GSSA Variable tar | semmle.label | GSSA Variable tar |
47+
| tarslip.py:94:7:94:39 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
48+
| tarslip.py:95:5:95:9 | GSSA Variable entry | semmle.label | GSSA Variable entry |
49+
| tarslip.py:96:17:96:21 | ControlFlowNode for entry | semmle.label | ControlFlowNode for entry |
50+
| tarslip.py:109:1:109:3 | GSSA Variable tar | semmle.label | GSSA Variable tar |
51+
| tarslip.py:109:7:109:39 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
52+
| tarslip.py:110:1:110:3 | ControlFlowNode for tar | semmle.label | ControlFlowNode for tar |
53+
| tarslip.py:112:1:112:3 | GSSA Variable tar | semmle.label | GSSA Variable tar |
54+
| tarslip.py:112:7:112:39 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
55+
| tarslip.py:113:24:113:26 | ControlFlowNode for tar | semmle.label | ControlFlowNode for tar |
3456
subpaths
3557
#select
3658
| tarslip.py:15:1:15:3 | ControlFlowNode for tar | tarslip.py:14:7:14:39 | ControlFlowNode for Attribute() | tarslip.py:15:1:15:3 | ControlFlowNode for tar | This file extraction depends on a $@. | tarslip.py:14:7:14:39 | ControlFlowNode for Attribute() | potentially untrusted source |
3759
| tarslip.py:20:17:20:21 | ControlFlowNode for entry | tarslip.py:18:7:18:39 | ControlFlowNode for Attribute() | tarslip.py:20:17:20:21 | ControlFlowNode for entry | This file extraction depends on a $@. | tarslip.py:18:7:18:39 | ControlFlowNode for Attribute() | potentially untrusted source |
3860
| tarslip.py:39:17:39:21 | ControlFlowNode for entry | tarslip.py:35:7:35:39 | ControlFlowNode for Attribute() | tarslip.py:39:17:39:21 | ControlFlowNode for entry | This file extraction depends on a $@. | tarslip.py:35:7:35:39 | ControlFlowNode for Attribute() | potentially untrusted source |
3961
| tarslip.py:43:24:43:26 | ControlFlowNode for tar | tarslip.py:42:7:42:39 | ControlFlowNode for Attribute() | tarslip.py:43:24:43:26 | ControlFlowNode for tar | This file extraction depends on a $@. | tarslip.py:42:7:42:39 | ControlFlowNode for Attribute() | potentially untrusted source |
4062
| tarslip.py:61:21:61:25 | ControlFlowNode for entry | tarslip.py:58:7:58:39 | ControlFlowNode for Attribute() | tarslip.py:61:21:61:25 | ControlFlowNode for entry | This file extraction depends on a $@. | tarslip.py:58:7:58:39 | ControlFlowNode for Attribute() | potentially untrusted source |
63+
| tarslip.py:91:1:91:3 | ControlFlowNode for tar | tarslip.py:90:7:90:39 | ControlFlowNode for Attribute() | tarslip.py:91:1:91:3 | ControlFlowNode for tar | This file extraction depends on a $@. | tarslip.py:90:7:90:39 | ControlFlowNode for Attribute() | potentially untrusted source |
64+
| tarslip.py:96:17:96:21 | ControlFlowNode for entry | tarslip.py:94:7:94:39 | ControlFlowNode for Attribute() | tarslip.py:96:17:96:21 | ControlFlowNode for entry | This file extraction depends on a $@. | tarslip.py:94:7:94:39 | ControlFlowNode for Attribute() | potentially untrusted source |
65+
| tarslip.py:110:1:110:3 | ControlFlowNode for tar | tarslip.py:109:7:109:39 | ControlFlowNode for Attribute() | tarslip.py:110:1:110:3 | ControlFlowNode for tar | This file extraction depends on a $@. | tarslip.py:109:7:109:39 | ControlFlowNode for Attribute() | potentially untrusted source |
66+
| tarslip.py:113:24:113:26 | ControlFlowNode for tar | tarslip.py:112:7:112:39 | ControlFlowNode for Attribute() | tarslip.py:113:24:113:26 | ControlFlowNode for tar | This file extraction depends on a $@. | tarslip.py:112:7:112:39 | ControlFlowNode for Attribute() | potentially untrusted source |

python/ql/test/query-tests/Security/CWE-022-TarSlip/tarslip.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,3 +82,35 @@ def safemembers(members):
8282
for entry in tar:
8383
if not os.path.isabs(entry.name):
8484
tar.extract(entry, "/tmp/unpack/")
85+
86+
# Extraction filters
87+
88+
extraction_filter = "fully_trusted"
89+
90+
tar = tarfile.open(unsafe_filename_tar)
91+
tar.extractall(filter=extraction_filter) # unsafe
92+
tar.close()
93+
94+
tar = tarfile.open(unsafe_filename_tar)
95+
for entry in tar:
96+
tar.extract(entry, filter=extraction_filter) # unsafe
97+
98+
extraction_filter = "data"
99+
100+
tar = tarfile.open(unsafe_filename_tar)
101+
tar.extractall(filter=extraction_filter) # safe
102+
tar.close()
103+
104+
tar = tarfile.open(unsafe_filename_tar)
105+
for entry in tar:
106+
tar.extract(entry, filter=extraction_filter) # safe
107+
108+
extraction_filter = None
109+
tar = tarfile.open(unsafe_filename_tar)
110+
tar.extractall(filter=extraction_filter) # unsafe
111+
112+
tar = tarfile.open(unsafe_filename_tar)
113+
tar.extractall(members=tar, filter=extraction_filter) # unsafe
114+
115+
tar = tarfile.open(unsafe_filename_tar)
116+
tar.extractall(members=safemembers(tar), filter=extraction_filter) # safe -- we assume `safemembers` makes up for the unsafe filter

0 commit comments

Comments
 (0)