Skip to content

Commit e52e8f8

Browse files
kiraksichalmerlowe
andauthored
feat: add 'columns' as an alias for 'col_order' (#701)
* feat: add 'columns' as an alias for 'col_order' * Added test to test alias correctness * reformatted with black * refactored to alias checking and testing * Reformatted tests for columns alias * feat: add 'columns' as an alias for 'col_order' * Added test to test alias correctness * reformatted with black * refactored to alias checking and testing * Reformatted tests for columns alias * Made col_order a keyword argument and added to-do * Edit todo comment * Fixed small error in docstring * Fixed valueerror message * reformatted with black --------- Co-authored-by: Chalmer Lowe <[email protected]>
1 parent 5a558e4 commit e52e8f8

File tree

3 files changed

+53
-6
lines changed

3 files changed

+53
-6
lines changed

docs/reading.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ destination DataFrame as well as a preferred column order as follows:
2828
'SELECT * FROM `test_dataset.test_table`',
2929
project_id=projectid,
3030
index_col='index_column_name',
31-
col_order=['col1', 'col2', 'col3'])
31+
columns=['col1', 'col2'])
3232
3333
Querying with legacy SQL syntax
3434
-------------------------------

pandas_gbq/gbq.py

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -734,7 +734,7 @@ def read_gbq(
734734
query_or_table,
735735
project_id=None,
736736
index_col=None,
737-
col_order=None,
737+
columns=None,
738738
reauth=False,
739739
auth_local_webserver=True,
740740
dialect=None,
@@ -750,6 +750,8 @@ def read_gbq(
750750
auth_redirect_uri=None,
751751
client_id=None,
752752
client_secret=None,
753+
*,
754+
col_order=None,
753755
):
754756
r"""Load data from Google BigQuery using google-cloud-python
755757
@@ -773,7 +775,7 @@ def read_gbq(
773775
the environment.
774776
index_col : str, optional
775777
Name of result column to use for index in results DataFrame.
776-
col_order : list(str), optional
778+
columns : list(str), optional
777779
List of BigQuery column names in the desired order for results
778780
DataFrame.
779781
reauth : boolean, default False
@@ -888,6 +890,8 @@ def read_gbq(
888890
client_secret : str
889891
The Client Secret associated with the Client ID for the Google Cloud Project
890892
the user is attempting to connect to.
893+
col_order : list(str), optional
894+
Alias for columns, retained for backwards compatibility.
891895
892896
Returns
893897
-------
@@ -966,10 +970,19 @@ def read_gbq(
966970
'Index column "{0}" does not exist in DataFrame.'.format(index_col)
967971
)
968972

973+
# Using columns as an alias for col_order, raising an error if both provided
974+
if col_order and not columns:
975+
columns = col_order
976+
elif col_order and columns:
977+
raise ValueError(
978+
"Must specify either columns (preferred) or col_order, not both"
979+
)
980+
969981
# Change the order of columns in the DataFrame based on provided list
970-
if col_order is not None:
971-
if sorted(col_order) == sorted(final_df.columns):
972-
final_df = final_df[col_order]
982+
# TODO(kiraksi): allow columns to be a subset of all columns in the table, with follow up PR
983+
if columns is not None:
984+
if sorted(columns) == sorted(final_df.columns):
985+
final_df = final_df[columns]
973986
else:
974987
raise InvalidColumnOrder("Column order does not match this DataFrame.")
975988

tests/system/test_gbq.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -600,6 +600,40 @@ def test_tokyo(self, tokyo_dataset, tokyo_table, project_id):
600600
)
601601
assert df["max_year"][0] >= 2000
602602

603+
def test_columns_as_alias(self, project_id):
604+
query = "SELECT 'a' AS string_1, 'b' AS string_2, 'c' AS string_3"
605+
columns = ["string_2", "string_1", "string_3"]
606+
607+
df = gbq.read_gbq(
608+
query,
609+
project_id=project_id,
610+
columns=columns,
611+
credentials=self.credentials,
612+
dialect="standard",
613+
)
614+
615+
expected = DataFrame({"string_1": ["a"], "string_2": ["b"], "string_3": ["c"]})[
616+
columns
617+
]
618+
619+
# Verify that the result_frame matches the expected DataFrame
620+
tm.assert_frame_equal(df, expected)
621+
622+
def test_columns_and_col_order_raises_error(self, project_id):
623+
query = "SELECT 'a' AS string_1, 'b' AS string_2, 'c' AS string_3"
624+
columns = ["string_2", "string_1"]
625+
col_order = ["string_3", "string_1", "string_2"]
626+
627+
with pytest.raises(ValueError):
628+
gbq.read_gbq(
629+
query,
630+
project_id=project_id,
631+
columns=columns,
632+
col_order=col_order,
633+
credentials=self.credentials,
634+
dialect="standard",
635+
)
636+
603637

604638
class TestToGBQIntegration(object):
605639
@pytest.fixture(autouse=True, scope="function")

0 commit comments

Comments
 (0)