Skip to content

Commit 39dd4db

Browse files
author
继盛
committed
upgrade to 0.8.2
1 parent 4b0de18 commit 39dd4db

File tree

15 files changed

+164
-22
lines changed

15 files changed

+164
-22
lines changed

odps/_version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
version_info = (0, 8, 1)
15+
version_info = (0, 8, 2)
1616
_num_index = max(idx if isinstance(v, int) else 0
1717
for idx, v in enumerate(version_info))
1818
__version__ = '.'.join(map(str, version_info[:_num_index + 1])) + \

odps/accounts.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,9 @@ def _do_POST(self):
136136
self.end_headers()
137137
return
138138

139+
self._sign(postvars)
140+
141+
def _sign(self, postvars):
139142
if self.server._token is not None:
140143
auth = self.headers.get('Authorization')
141144
if not auth:
@@ -267,3 +270,16 @@ def sign_request(self, req, endpoint):
267270
LOG.debug('headers after signing: ' + repr(req.headers))
268271
else:
269272
raise SignServerError('Sign server returned error code: %d' % resp.status_code, resp.status_code)
273+
274+
275+
class BearerTokenAccount(BaseAccount):
276+
def __init__(self, token):
277+
self._token = token
278+
279+
@property
280+
def token(self):
281+
return self._token
282+
283+
def sign_request(self, req, endpoint):
284+
req.headers['x-odps-bearer-token'] = self._token
285+
LOG.debug('headers after signing: ' + repr(req.headers))

odps/config.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525

2626
DEFAULT_CHUNK_SIZE = 1496
2727
DEFAULT_CONNECT_RETRY_TIMES = 4
28-
DEFAULT_CONNECT_TIMEOUT = 5
28+
DEFAULT_CONNECT_TIMEOUT = 10
2929
DEFAULT_READ_TIMEOUT = 120
3030
DEFAULT_POOL_CONNECTIONS = 10
3131
DEFAULT_POOL_MAXSIZE = 10
@@ -329,7 +329,7 @@ def validate(x):
329329
options.register_option('allow_antique_date', False)
330330
options.register_option('user_agent_pattern', '$pyodps_version $python_version $os_version')
331331
options.register_option('log_view_host', None)
332-
options.register_option('log_view_hours', 24, validator=is_integer)
332+
options.register_option('log_view_hours', 24 * 30, validator=is_integer)
333333
options.register_option('api_proxy', None)
334334
options.register_option('data_proxy', None)
335335
options.redirect_option('tunnel_proxy', 'data_proxy')

odps/df/backends/odpssql/compiler.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1398,12 +1398,11 @@ def visit_join(self, expr):
13981398
self._ctx.add_expr_compiled(expr, from_clause)
13991399

14001400
def visit_union(self, expr):
1401-
if expr._distinct:
1402-
raise CompileError("Distinct union is not supported here.")
1403-
1401+
union_type = 'UNION ALL' if not expr._distinct else 'UNION'
14041402
left_compiled, right_compiled = tuple(self._sub_compiles[expr])
14051403

1406-
from_clause = '{0} \nUNION ALL\n{1}'.format(left_compiled, utils.indent(right_compiled, self._indent_size))
1404+
from_clause = '{0} \n{1}\n{2}'.format(left_compiled, union_type,
1405+
utils.indent(right_compiled, self._indent_size))
14071406

14081407
compiled = from_clause
14091408
if not self._union_no_alias.get(expr, False):

odps/df/backends/odpssql/tests/test_compiler.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2546,6 +2546,20 @@ def testUnion(self):
25462546

25472547
self.assertEqual(to_str(expected), to_str(ODPSEngine(self.odps).compile(expr, prettify=False)))
25482548

2549+
expr = e.union(e3['const', 'id', 'name'], distinct=True)
2550+
2551+
expected = "SELECT * \n" \
2552+
"FROM (\n" \
2553+
" SELECT t1.`name`, t1.`id`, 'cst' AS `const` \n" \
2554+
" FROM mocked_project.`pyodps_test_expr_table` t1 \n" \
2555+
" UNION\n" \
2556+
" SELECT t2.`name`, SUM(t2.`id`) AS `id`, 'cst' AS `const` \n" \
2557+
" FROM mocked_project.`pyodps_test_expr_table1` t2 \n" \
2558+
" GROUP BY t2.`name`\n" \
2559+
") t3"
2560+
2561+
self.assertEqual(to_str(expected), to_str(ODPSEngine(self.odps).compile(expr, prettify=False)))
2562+
25492563
def testAliases(self):
25502564
df = self.expr
25512565
df = df[(df.id == 1) | (df.id == 2)].exclude(['fid'])

odps/df/backends/odpssql/types.py

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
#!/usr/bin/env python
22
# -*- coding: utf-8 -*-
33
# Copyright 1999-2017 Alibaba Group Holding Ltd.
4-
#
4+
#
55
# Licensed under the Apache License, Version 2.0 (the "License");
66
# you may not use this file except in compliance with the License.
77
# You may obtain a copy of the License at
8-
#
8+
#
99
# http://www.apache.org/licenses/LICENSE-2.0
10-
#
10+
#
1111
# Unless required by applicable law or agreed to in writing, software
1212
# distributed under the License is distributed on an "AS IS" BASIS,
1313
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -18,6 +18,7 @@
1818
from .... import types as odps_types
1919
from ... import types as df_types
2020
from ....compat import six
21+
from ....config import options
2122

2223

2324
_odps_to_df_types = {
@@ -46,6 +47,19 @@
4647
df_types.binary: odps_types.string,
4748
}
4849

50+
_df_to_odps_types2 = {
51+
df_types.int8: odps_types.tinyint,
52+
df_types.int16: odps_types.smallint,
53+
df_types.int32: odps_types.int_,
54+
df_types.int64: odps_types.bigint,
55+
df_types.float32: odps_types.float_,
56+
df_types.float64: odps_types.double,
57+
df_types.boolean: odps_types.boolean,
58+
df_types.string: odps_types.string,
59+
df_types.datetime: odps_types.datetime,
60+
df_types.binary: odps_types.binary,
61+
}
62+
4963

5064
def odps_type_to_df_type(odps_type):
5165
if isinstance(odps_type, six.string_types):
@@ -74,11 +88,15 @@ def odps_schema_to_df_schema(odps_schema):
7488

7589

7690
def df_type_to_odps_type(df_type):
91+
if options.sql.use_odps2_extension:
92+
df_to_odps_types = _df_to_odps_types2
93+
else:
94+
df_to_odps_types = _df_to_odps_types
7795
if isinstance(df_type, six.string_types):
7896
df_type = df_types.validate_data_type(df_type)
7997

80-
if df_type in _df_to_odps_types:
81-
return _df_to_odps_types[df_type]
98+
if df_type in df_to_odps_types:
99+
return df_to_odps_types[df_type]
82100
elif df_type == df_types.decimal:
83101
return odps_types.Decimal()
84102
elif isinstance(df_type, df_types.List):

odps/df/backends/tests/test_mixed_engine.py

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,12 @@
1818
import time
1919
import uuid
2020

21-
from odps.tests.core import tn, pandas_case
21+
from odps import types as odps_types
22+
from odps.tests.core import tn, pandas_case, odps2_typed_case
2223
from odps.df.backends.tests.core import TestBase
2324
from odps.config import options
2425
from odps.compat import unittest
2526
from odps.models import Schema, Instance
26-
from odps.errors import ODPSError
2727
from odps.df.backends.engine import MixedEngine
2828
from odps.df.backends.odpssql.engine import ODPSSQLEngine
2929
from odps.df.backends.pd.engine import PandasEngine
@@ -194,6 +194,31 @@ def testPandasPersist(self):
194194
finally:
195195
self.odps.delete_table(tmp_table_name)
196196

197+
@odps2_typed_case
198+
def testPandasPersistODPS2(self):
199+
import pandas as pd
200+
import numpy as np
201+
202+
data_int8 = np.random.randint(0, 10, (1,), dtype=np.int8)
203+
data_int16 = np.random.randint(0, 10, (1,), dtype=np.int16)
204+
data_int32 = np.random.randint(0, 10, (1,), dtype=np.int32)
205+
data_int64 = np.random.randint(0, 10, (1,), dtype=np.int64)
206+
data_float32 = np.random.random((1,)).astype(np.float32)
207+
data_float64 = np.random.random((1,)).astype(np.float64)
208+
209+
df = DataFrame(pd.DataFrame(dict(data_int8=data_int8, data_int16=data_int16,
210+
data_int32=data_int32, data_int64=data_int64,
211+
data_float32=data_float32, data_float64=data_float64)))
212+
tmp_table_name = tn('pyodps_test_mixed_persist_odps2_types')
213+
214+
self.odps.delete_table(tmp_table_name, if_exists=True)
215+
df.persist(tmp_table_name, lifecycle=1, drop_table=True, odps=self.odps)
216+
217+
t = self.odps.get_table(tmp_table_name)
218+
expected_types = [odps_types.tinyint, odps_types.smallint, odps_types.int_,
219+
odps_types.bigint, odps_types.float_, odps_types.double]
220+
self.assertEqual(expected_types, t.schema.types)
221+
197222
def testExecuteCacheTable(self):
198223
df = self.odps_df[self.odps_df.name == 'name1']
199224
result = df.execute().values

odps/models/instance.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -841,6 +841,8 @@ def open_reader(self, *args, **kwargs):
841841
if false, use conventional routine.
842842
if absent, `options.tunnel.use_instance_tunnel` will be used and automatic fallback
843843
is enabled.
844+
:param limit: if True, enable the limitation
845+
:type limit: bool
844846
:param reopen: the reader will reuse last one, reopen is true means open a new reader.
845847
:type reopen: bool
846848
:param endpoint: the tunnel service URL

odps/models/instances.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,7 @@ def _get_submit_instance_content(cls, job):
133133
return Instance.AnonymousSubmitInstance(job=job).serialize()
134134

135135
def create(self, xml=None, job=None, task=None, priority=None, running_cluster=None,
136-
headers=None, create_callback=None):
136+
headers=None, create_callback=None, encoding=None):
137137
if xml is None:
138138
job = self._create_job(job=job, task=task, priority=priority,
139139
running_cluster=running_cluster)
@@ -155,6 +155,8 @@ def create(self, xml=None, job=None, task=None, priority=None, running_cluster=N
155155
if create_callback is not None:
156156
create_callback(instance_id)
157157

158+
if encoding is not None:
159+
resp.encoding = encoding
158160
body = resp.text
159161
if body:
160162
instance_result = Instance.InstanceResult.parse(self._client, resp)

odps/models/table.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -270,10 +270,11 @@ def gen_create_table_sql(table_name, table_schema, comment=None, if_not_exists=F
270270
project = utils.to_text(project)
271271
comment = utils.to_text(comment)
272272

273-
store_as = kw.get('store_as')
273+
stored_as = kw.get('stored_as')
274+
external_stored_as = kw.get('external_stored_as')
274275
storage_handler = kw.get('storage_handler')
275276

276-
buf.write(u'CREATE%s TABLE ' % (' EXTERNAL' if storage_handler or store_as else ''))
277+
buf.write(u'CREATE%s TABLE ' % (' EXTERNAL' if storage_handler or external_stored_as else ''))
277278
if if_not_exists:
278279
buf.write(u'IF NOT EXISTS ')
279280
if project is not None:
@@ -319,11 +320,11 @@ def write_columns(col_array):
319320
serde_properties = kw.get('serde_properties')
320321
location = kw.get('location')
321322
resources = kw.get('resources')
322-
if storage_handler or store_as:
323+
if storage_handler or external_stored_as:
323324
if storage_handler:
324325
buf.write("STORED BY '%s'\n" % escape_odps_string(storage_handler))
325326
else:
326-
buf.write("STORED AS %s\n" % escape_odps_string(store_as))
327+
buf.write("STORED AS %s\n" % escape_odps_string(external_stored_as))
327328
if serde_properties:
328329
buf.write('WITH SERDEPROPERTIES (\n')
329330
for idx, k in enumerate(serde_properties):
@@ -344,6 +345,9 @@ def write_columns(col_array):
344345
if hub_lifecycle is not None:
345346
buf.write(u' HUBLIFECYCLE %s\n' % hub_lifecycle)
346347

348+
if stored_as:
349+
buf.write("STORED AS %s\n" % escape_odps_string(stored_as))
350+
347351
return buf.getvalue().strip()
348352

349353
def get_ddl(self, with_comments=True, if_not_exists=False):

odps/models/xflows.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,10 @@ def get_xflow_sub_instances(self, instance):
180180
six.itervalues(self.get_xflow_results(instance))):
181181
if x_result.node_type == 'Instance':
182182
inst_dict[x_result.name] = self.odps.get_instance(x_result.instance_id)
183+
elif x_result.node_type == 'SubWorkflow':
184+
sub_instance = self.odps.get_instance(x_result.instance_id)
185+
sub_inst_dict = self.odps.get_xflow_sub_instances(sub_instance)
186+
inst_dict.update(**sub_inst_dict)
183187
return inst_dict
184188

185189
def iter_xflow_sub_instances(self, instance, interval=1):

odps/tempobj.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -401,7 +401,11 @@ def clean_thread():
401401

402402

403403
def _gen_repository_key(odps):
404-
return hashlib.md5('####'.join([odps.account.access_id, odps.endpoint, odps.project]).encode('utf-8')).hexdigest()
404+
if hasattr(odps.account, 'access_id'):
405+
keys = [odps.account.access_id, odps.endpoint, odps.project]
406+
elif hasattr(odps.account, 'token'):
407+
keys = [odps.account.token, odps.endpoint, odps.project]
408+
return hashlib.md5('####'.join(keys).encode('utf-8')).hexdigest()
405409

406410

407411
def _put_objects(odps, objs):

odps/tests/test_accounts.py

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,9 @@
1717
import uuid
1818

1919
from odps import ODPS
20+
from odps import errors
2021
from odps.tests.core import TestBase, tn
21-
from odps.accounts import SignServer, SignServerAccount, SignServerError
22+
from odps.accounts import SignServer, SignServerAccount, SignServerError, BearerTokenAccount
2223

2324

2425
class Test(TestBase):
@@ -54,3 +55,36 @@ def testTokenizedSignServerAccount(self):
5455
t.drop(async_=True)
5556
finally:
5657
server.stop()
58+
59+
def testBearerTokenAccount(self):
60+
self.odps.delete_table(tn('test_bearer_token_account_table'), if_exists=True)
61+
t = self.odps.create_table(tn('test_bearer_token_account_table'), 'col string', lifecycle=1)
62+
with t.open_writer() as writer:
63+
records = [['val1'], ['val2'], ['val3']]
64+
writer.write(records)
65+
66+
inst = self.odps.execute_sql('select count(*) from {0}'.format(tn('test_bearer_token_account_table')), async_=True)
67+
inst.wait_for_success()
68+
task_name = inst.get_task_names()[0]
69+
70+
logview_address = inst.get_logview_address()
71+
token = logview_address[logview_address.find('token=') + len('token='):]
72+
bearer_token_account = BearerTokenAccount(token=token)
73+
bearer_token_odps = ODPS(None, None, self.odps.project, self.odps.endpoint, account=bearer_token_account)
74+
bearer_token_instance = bearer_token_odps.get_instance(inst.id)
75+
76+
self.assertEqual(inst.get_task_result(task_name),
77+
bearer_token_instance.get_task_result(task_name))
78+
self.assertEqual(inst.get_task_summary(task_name),
79+
bearer_token_instance.get_task_summary(task_name))
80+
81+
with self.assertRaises(errors.NoPermission):
82+
bearer_token_odps.create_table(tn('test_bearer_token_account_table_test1'),
83+
'col string', lifecycle=1)
84+
85+
fake_token_account = BearerTokenAccount(token='fake-token')
86+
bearer_token_odps = ODPS(None, None, self.odps.project, self.odps.endpoint, account=fake_token_account)
87+
88+
with self.assertRaises(errors.ODPSError):
89+
bearer_token_odps.create_table(tn('test_bearer_token_account_table_test2'),
90+
'col string', lifecycle=1)

odps/types.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -270,7 +270,7 @@ def _repr(self):
270270
return buf.getvalue()
271271

272272
def build_snapshot(self):
273-
if self._snapshot is None and not options.force_py:
273+
if not options.force_py:
274274
if not self._columns:
275275
return None
276276

setup.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,15 +13,35 @@
1313
# See the License for the specific language governing permissions and
1414
# limitations under the License.
1515

16+
# Parts of this file were taken from the pandas project
17+
# (https://github.com/pandas-dev/pandas), which is permitted for use under
18+
# the BSD 3-Clause License
19+
1620
from setuptools import setup, find_packages, Extension
1721
from setuptools.command.install import install
1822
from distutils.cmd import Command
23+
from distutils.sysconfig import get_config_var
24+
from distutils.version import LooseVersion
1925

2026
import sys
2127
import os
2228
import platform
2329
import shutil
2430

31+
32+
# From https://github.com/pandas-dev/pandas/pull/24274:
33+
# For mac, ensure extensions are built for macos 10.9 when compiling on a
34+
# 10.9 system or above, overriding distuitls behaviour which is to target
35+
# the version that python was built for. This may be overridden by setting
36+
# MACOSX_DEPLOYMENT_TARGET before calling setup.py
37+
if sys.platform == 'darwin':
38+
if 'MACOSX_DEPLOYMENT_TARGET' not in os.environ:
39+
current_system = LooseVersion(platform.mac_ver()[0])
40+
python_target = LooseVersion(
41+
get_config_var('MACOSX_DEPLOYMENT_TARGET'))
42+
if python_target < '10.9' and current_system >= '10.9':
43+
os.environ['MACOSX_DEPLOYMENT_TARGET'] = '10.9'
44+
2545
repo_root = os.path.dirname(os.path.abspath(__file__))
2646

2747
try:

0 commit comments

Comments
 (0)