Skip to content

Commit a529690

Browse files
committed
refs #281, Pandas partially working
Worked towards getting rid of ResultSet entirely and having SQLDriver.execute() returning DataFrames directly with the appropriate attrs set
1 parent c6a8983 commit a529690

File tree

1 file changed

+198
-14
lines changed

1 file changed

+198
-14
lines changed

pysimplesql/pysimplesql.py

+198-14
Original file line numberDiff line numberDiff line change
@@ -168,10 +168,17 @@
168168
DELETE_RETURNED: int = 2 # A result was found
169169
DELETE_ABORTED: int = 4 # The search was aborted, likely during a callback
170170
DELETE_RECURSION_LIMIT_ERROR: int = 8 # We hit max nested levels
171-
DELETE_CASCADE_RECURSION_LIMIT = (
171+
DELETE_CASCADE_RECURSION_LIMIT: int = (
172172
15 # Mysql sets this as 15 when using foreign key CASCADE DELETE
173173
)
174174

175+
# -------
176+
# Sorting
177+
# -------
178+
SORT_NONE = 0
179+
SORT_ASC = 1
180+
SORT_DESC = 2
181+
175182

176183
# -------
177184
# CLASSES
@@ -538,11 +545,11 @@ def __init__(
538545
self.where_clause: str = "" # In addition to the generated where clause!
539546
self.dependents: list = []
540547
self.column_info: ColumnInfo # ColumnInfo collection
541-
self.rows: Union[ResultSet, None] = None
548+
self.rows: Union[pd.DataFrame, None] = None
542549
self.search_order: List[str] = []
543550
self.selector: List[str] = []
544551
self.callbacks: CallbacksDict = {}
545-
self.transform: Optional[Callable[[ResultSet, int], None]] = None
552+
self.transform: Optional[Callable[[pd.DataFrame, int], None]] = None
546553
self.filtered: bool = filtered
547554
if prompt_save is None:
548555
self._prompt_save = self.frm._prompt_save
@@ -980,7 +987,7 @@ def requery(
980987
not len(self.frm[parent_table].rows.index)
981988
or Relationship.parent_virtual(self.table, self.frm)
982989
):
983-
self.rows = ResultSet([]) # purge rows
990+
self.rows = pd.DataFrame([]) # purge rows
984991
if update_elements:
985992
self.frm.update_elements(self.key)
986993
if requery_dependents:
@@ -996,13 +1003,15 @@ def requery(
9961003
try:
9971004
sort_settings = self.rows.store_sort_settings()
9981005
except AttributeError:
999-
sort_settings = [None, ResultSet.SORT_NONE] # default for first query
1006+
sort_settings = [None, SORT_NONE] # default for first query
10001007

10011008
rows = self.driver.execute(query)
10021009
self.rows = rows
1010+
print(self.rows)
10031011
# now we can restore the sort order
1004-
self.rows.load_sort_settings(sort_settings)
1005-
self.rows.sort(self.table)
1012+
self.load_sort_settings(sort_settings)
1013+
self.sort(self.table)
1014+
10061015
# Perform transform one row at a time
10071016
if self.transform is not None:
10081017
self.rows = self.rows.apply(
@@ -2157,6 +2166,147 @@ def add_simple_transform(self, transforms: SimpleTransformsDict) -> None:
21572166
RuntimeError(f"Transform for {k} must be callable!")
21582167
self._simple_transform[k] = v
21592168

2169+
def purge_virtual(self) -> None:
2170+
"""
2171+
Purge virtual rows from the DataFrame.
2172+
2173+
:returns: None
2174+
"""
2175+
virtual_rows = self.rows.attrs["virtual"][self.rows.attrs["virtual"]].index
2176+
self.rows.drop(virtual_rows, inplace=True)
2177+
self.rows.attrs["original_index"] = self.rows.attrs["original_index"].drop(
2178+
virtual_rows
2179+
)
2180+
self.rows.attrs["virtual"] = self.rows.attrs["virtual"].drop(virtual_rows)
2181+
2182+
def sort_by_column(self, column: str, table: str, reverse=False) -> None:
2183+
"""
2184+
Sort the DataFrame by column. Using the mapped relationships of the database,
2185+
foreign keys will automatically sort based on the parent table's description
2186+
column, rather than the foreign key number.
2187+
2188+
:param column: The name of the column to sort the DataFrame by
2189+
:param table: The name of the table the column belongs to
2190+
:param reverse: Reverse the sort; False = ASC, True = DESC
2191+
:returns: None
2192+
"""
2193+
# Target sorting by this ResultSet
2194+
rows = self # search criteria is based on rows
2195+
target_col = column # Looking in rows for this column
2196+
target_val = column # to be equal to the same column in self.rows
2197+
2198+
# We don't want to sort by foreign keys directly - we want to sort by the
2199+
# description column of the foreign table that the foreign key references
2200+
rels = Relationship.get_relationships(table)
2201+
for rel in rels:
2202+
if column == rel.fk_column:
2203+
rows = rel.frm[
2204+
rel.parent_table
2205+
] # change the rows used for sort criteria
2206+
target_col = rel.pk_column # change our target column to look in
2207+
target_val = rel.frm[
2208+
rel.parent_table
2209+
].description_column # and return the value in this column
2210+
break
2211+
2212+
def get_sort_key(row):
2213+
try:
2214+
return next(
2215+
r[target_val]
2216+
for _, r in rows.iterrows()
2217+
if r[target_col] == row[column]
2218+
)
2219+
except StopIteration:
2220+
return None
2221+
2222+
try:
2223+
self.sort_values(
2224+
by=self.index, key=get_sort_key, ascending=not reverse, inplace=True
2225+
)
2226+
except KeyError:
2227+
logger.debug(f"ResultSet could not sort by column {column}. KeyError.")
2228+
2229+
def sort_by_index(self, index: int, table: str, reverse=False):
2230+
"""
2231+
Sort the `ResultSet` by column index Using the mapped relationships of the
2232+
database, foreign keys will automatically sort based on the parent table's
2233+
description column, rather than the foreign key number.
2234+
2235+
:param index: The index of the column to sort the `ResultSet` by
2236+
:param table: The name of the table the column belongs to
2237+
:param reverse: Reverse the sort; False = ASC, True = DESC
2238+
:returns: None
2239+
"""
2240+
column = self.columns[index]
2241+
self.sort_by_column(column, table, reverse)
2242+
2243+
def store_sort_settings(self) -> list:
2244+
"""
2245+
Store the current sort settingg. Sort settings are just the sort column and
2246+
reverse setting. Sort order can be restored with
2247+
`ResultSet.load_sort_settings()`.
2248+
2249+
:returns: A list containing the sort_column and the sort_reverse
2250+
"""
2251+
return [self.sort_column, self.sort_reverse]
2252+
2253+
def load_sort_settings(self, sort_settings: list) -> None:
2254+
"""
2255+
Load a previously stored sort setting. Sort settings are just the sort columm
2256+
and reverse setting.
2257+
2258+
:param sort_settings: A list as returned by `ResultSet.store_sort_settings()`
2259+
"""
2260+
self.sort_column = sort_settings[0]
2261+
self.sort_reverse = sort_settings[1]
2262+
2263+
def sort_reset(self) -> None:
2264+
"""
2265+
Reset the sort order to the original when this ResultSet was created. Each
2266+
ResultRow has the original order stored.
2267+
2268+
:returns: None
2269+
"""
2270+
self.rows.index = self.rows.attrs["original_index"]
2271+
2272+
def sort(self, table: str) -> None:
2273+
"""
2274+
Sort according to the internal sort_column and sort_reverse variables. This is a
2275+
good way to re-sort without changing the sort_cycle.
2276+
2277+
:param table: The table associated with this ResultSet. Passed along to
2278+
`ResultSet.sort_by_column()`
2279+
:returns: None
2280+
"""
2281+
if self.sort_column is None:
2282+
self.sort_reset()
2283+
else:
2284+
self.sort_by_column(self.sort_column, table, self.sort_reverse)
2285+
2286+
def sort_cycle(self, column: str, table: str) -> int:
2287+
"""
2288+
Cycle between original sort order of the ResultSet, ASC by column, and DESC by
2289+
column with each call.
2290+
2291+
:param column: The column name to cycle the sort on
2292+
:param table: The table that the column belongs to
2293+
:returns: A ResultSet sort constant; ResultSet.SORT_NONE, ResultSet.SORT_ASC, or
2294+
ResultSet.SORT_DESC
2295+
"""
2296+
if column != self.sort_column:
2297+
self.sort_column = column
2298+
self.sort_reverse = False
2299+
self.sort(table)
2300+
return SORT_ASC
2301+
if not self.sort_reverse:
2302+
self.sort_reverse = True
2303+
self.sort(table)
2304+
return SORT_DESC
2305+
self.sort_reverse = False
2306+
self.sort_column = None
2307+
self.sort(table)
2308+
return SORT_NONE
2309+
21602310

21612311
class Form:
21622312

@@ -5082,7 +5232,7 @@ def __init__(
50825232
def __call__(self, column):
50835233
# store the pk:
50845234
pk = self.frm[self.data_key].get_current_pk()
5085-
sort_order = self.frm[self.data_key].rows.sort_cycle(column, self.data_key)
5235+
sort_order = self.frm[self.data_key].sort_cycle(column, self.data_key)
50865236
# We only need to update the selectors not all elements,
50875237
# so first set by the primary key, then update_selectors()
50885238
self.frm[self.data_key].set_by_pk(
@@ -5829,7 +5979,40 @@ def _get_list(self, key: str) -> List:
58295979
# return a generic ResultSet instance, which contains a collection of generic ResultRow
58305980
# instances.
58315981
# --------------------------------------------------------------------------------------
5832-
class ResultSet(pd.DataFrame):
5982+
class Result:
5983+
"""
5984+
This is a "dummy" Result object that is a convenience for constructing a DataFrame
5985+
that has the expected attrs set.
5986+
"""
5987+
5988+
@classmethod
5989+
def set(
5990+
cls,
5991+
row_data: dict,
5992+
lastrowid: int = None,
5993+
exception: Exception = None,
5994+
column_info: ColumnInfo = None,
5995+
):
5996+
"""
5997+
Create a pandas DataFrame with the row data and expected attrs set.
5998+
5999+
:param row_data: A list of dicts of row data
6000+
:param lastrowid: The inserted row ID from the last INSERT statement
6001+
:param exception: Exceptions passed back from the SQLDriver
6002+
:param column_info: An optional ColumnInfo object
6003+
"""
6004+
df = pd.DataFrame(row_data)
6005+
df.attrs["lastrowid"] = lastrowid
6006+
df.attrs["exception"] = exception
6007+
df.attrs["original_index"] = df.index.copy() # Store the original index
6008+
df.attrs["column_info"] = column_info
6009+
df.attrs["virtual"] = pd.Series(
6010+
[False] * len(df.index), index=df.index
6011+
) # Store virtual flags for each row
6012+
return df
6013+
6014+
6015+
class ResultSet2(pd.DataFrame):
58336016
"""
58346017
The ResultSet class is a generic result class so that working with the resultset of
58356018
the different supported databases behave in a consistent manner. A `ResultSet` is a
@@ -5924,7 +6107,7 @@ def insert_row(self, row: dict, idx: int = None) -> None:
59246107

59256108
def purge_virtual(self) -> None:
59266109
"""
5927-
Purge virtual rows from the `ResultSet`.
6110+
Purge virtual rows from the DataFrame.
59286111
59296112
:returns: None
59306113
"""
@@ -6685,7 +6868,9 @@ def execute(
66856868
rows = []
66866869

66876870
lastrowid = cursor.lastrowid if cursor.lastrowid is not None else None
6688-
return ResultSet([dict(row) for row in rows], lastrowid, exception, column_info)
6871+
return Result.set(
6872+
[dict(row) for row in rows], lastrowid, exception, column_info
6873+
)
66896874

66906875
def close(self):
66916876
# Only do cleanup if this is not an imported database
@@ -6729,9 +6914,8 @@ def column_info(self, table):
67296914

67306915
def pk_column(self, table):
67316916
q = f"PRAGMA table_info({self.quote_table(table)})"
6732-
row = self.execute(q, silent=True).fetchone()
6733-
6734-
return row["name"] if "name" in row else None
6917+
result = self.execute(q, silent=True)
6918+
return result.loc[result["pk"] == 1, "name"].iloc[0]
67356919

67366920
def relationships(self):
67376921
# Return a list of dicts {from_table,to_table,from_column,to_column,requery}

0 commit comments

Comments
 (0)