Skip to content

Commit e77853e

Browse files
authored
Releases v0.12.3 (#281)
1 parent ff54497 commit e77853e

File tree

5 files changed

+79
-15
lines changed

5 files changed

+79
-15
lines changed

odps/_version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
version_info = (0, 12, 3, "rc1")
15+
version_info = (0, 12, 3)
1616
_num_index = max(idx if isinstance(v, int) else 0 for idx, v in enumerate(version_info))
1717
__version__ = ".".join(map(str, version_info[: _num_index + 1])) + "".join(
1818
version_info[_num_index + 1 :]

odps/df/backends/pd/types.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,9 @@
2929
pd = None
3030

3131
from ... import types
32-
from ....models import TableSchema
32+
from .... import types as odps_types
3333
from ....compat import six
34+
from ....models import TableSchema
3435

3536
_np_to_df_types = dict()
3637
_df_to_np_types = dict()
@@ -100,7 +101,10 @@ def np_type_to_df_type(dtype, arr=None, unknown_as_string=False, name=None):
100101
raise TypeError('Unknown dtype: %s' % dtype)
101102

102103

103-
def pd_to_df_schema(pd_df, unknown_as_string=False, as_type=None):
104+
def pd_to_df_schema(pd_df, unknown_as_string=False, as_type=None, type_mapping=None):
105+
from ..odpssql.types import odps_type_to_df_type
106+
107+
type_mapping = type_mapping or {}
104108
if pd_df.index.name is not None:
105109
pd_df.reset_index(inplace=True)
106110

@@ -113,6 +117,14 @@ def pd_to_df_schema(pd_df, unknown_as_string=False, as_type=None):
113117
if as_type and names[i] in as_type:
114118
df_types.append(as_type[names[i]])
115119
continue
120+
if names[i] in type_mapping:
121+
try:
122+
odps_type = odps_types.validate_data_type(type_mapping[names[i]])
123+
df_type = odps_type_to_df_type(odps_type)
124+
except:
125+
df_type = types.validate_data_type(type_mapping[names[i]])
126+
df_types.append(df_type)
127+
continue
116128
df_types.append(np_type_to_df_type(dtypes.iloc[i], arr,
117129
unknown_as_string=unknown_as_string,
118130
name=names[i]))

odps/models/tableio.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -841,7 +841,11 @@ def _resolve_schema(
841841
data_schema = arrow_schema_to_odps_schema(records_list.schema)
842842
elif cls._is_pd_df(records_list):
843843
data_schema = df_schema_to_odps_schema(
844-
pd_to_df_schema(records_list, unknown_as_string=unknown_as_string)
844+
pd_to_df_schema(
845+
records_list,
846+
unknown_as_string=unknown_as_string,
847+
type_mapping=type_mapping,
848+
)
845849
)
846850
elif isinstance(records_list, list) and odps_types.is_record(
847851
records_list[0]

odps/models/tests/test_tableio.py

Lines changed: 58 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -824,20 +824,63 @@ def test_write_pandas_with_dynamic_parts(odps, use_arrow):
824824
@pyarrow_case
825825
@pandas_case
826826
@odps2_typed_case
827-
def test_write_pandas_with_complex_type_and_mapping(odps):
827+
def test_write_pandas_with_arrow_complex_type(odps):
828828
if Version(pa.__version__) < Version("1.0.0"):
829829
pytest.skip("casting nested type is not supported in arrow < 1.0.0")
830-
test_table_name = tn("pyodps_t_tmp_write_pd_complex_type")
830+
test_table_name = tn("pyodps_t_tmp_write_arrow_complex_type")
831831
odps.delete_table(test_table_name, if_exists=True)
832832

833-
table = odps.create_table(
834-
test_table_name,
835-
"idx string, list_data array<bigint>, "
836-
"list_struct_data array<struct<name:string, val: bigint>>, "
837-
"map_data map<string, bigint>",
838-
table_properties={"columnar.nested.type": "true"},
839-
lifecycle=1,
833+
data = pd.DataFrame(
834+
[
835+
[
836+
"05ac09c4",
837+
[134, 256],
838+
[None, {"name": "col1", "val": 134}],
839+
],
840+
["cfae9054", [5431], [{"name": "col2", "val": 2345}]],
841+
[
842+
"6029501d",
843+
[145, None, 561],
844+
[{"name": "ddd", "val": 2341}, {"name": None, "val": None}],
845+
],
846+
[
847+
"c653e520",
848+
[7412, 234],
849+
[None, {"name": "uvw", "val": None}],
850+
],
851+
["59caed0d", [295, 1674], None],
852+
],
853+
columns=["idx", "list_data", "list_struct_data"],
840854
)
855+
arrow_data = pa.Table.from_pandas(data)
856+
try:
857+
table_kwargs = {
858+
"table_properties": {"columnar.nested.type": "true"},
859+
}
860+
odps.write_table(
861+
test_table_name,
862+
arrow_data,
863+
create_table=True,
864+
lifecycle=1,
865+
table_kwargs=table_kwargs,
866+
)
867+
table = odps.get_table(test_table_name)
868+
pd.testing.assert_frame_equal(
869+
data.sort_values("idx").reset_index(drop=True),
870+
table.to_pandas().sort_values("idx").reset_index(drop=True),
871+
)
872+
finally:
873+
odps.delete_table(test_table_name, if_exists=True)
874+
875+
876+
@pyarrow_case
877+
@pandas_case
878+
@odps2_typed_case
879+
def test_write_pandas_with_complex_type_and_mapping(odps):
880+
if Version(pa.__version__) < Version("1.0.0"):
881+
pytest.skip("casting nested type is not supported in arrow < 1.0.0")
882+
test_table_name = tn("pyodps_t_tmp_write_pd_complex_type")
883+
odps.delete_table(test_table_name, if_exists=True)
841884

842885
data = pd.DataFrame(
843886
[
@@ -870,19 +913,24 @@ def test_write_pandas_with_complex_type_and_mapping(odps):
870913
"list_struct_data": "array<struct<name:string, val: bigint>>",
871914
"map_data": "map<string, bigint>",
872915
}
916+
table_kwargs = {
917+
"table_properties": {"columnar.nested.type": "true"},
918+
}
873919
odps.write_table(
874920
test_table_name,
875921
data,
876922
type_mapping=type_mapping,
877923
create_table=True,
878924
lifecycle=1,
925+
table_kwargs=table_kwargs,
879926
)
927+
table = odps.get_table(test_table_name)
880928
pd.testing.assert_frame_equal(
881929
data.sort_values("idx").reset_index(drop=True),
882930
table.to_pandas().sort_values("idx").reset_index(drop=True),
883931
)
884932
finally:
885-
table.drop()
933+
odps.delete_table(test_table_name, if_exists=True)
886934

887935

888936
@pyarrow_case

odps/tunnel/io/types.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ def arrow_type_to_odps_type(arrow_type):
125125
elif isinstance(arrow_type, pa.StructType):
126126
fields = [
127127
(arrow_type[idx].name, arrow_type_to_odps_type(arrow_type[idx].type))
128-
for idx in arrow_type.num_fields
128+
for idx in range(arrow_type.num_fields)
129129
]
130130
col_type = types.Struct(fields)
131131
else:

0 commit comments

Comments
 (0)