From 6cf0e214617f8b13c5a33e66e8c49f4fe931dce2 Mon Sep 17 00:00:00 2001 From: beanan Date: Sun, 4 Oct 2020 12:23:13 +0800 Subject: [PATCH 1/6] DOC: black user_guide/10min.rst code style --- doc/source/user_guide/10min.rst | 101 ++++++++++++++++---------------- 1 file changed, 51 insertions(+), 50 deletions(-) diff --git a/doc/source/user_guide/10min.rst b/doc/source/user_guide/10min.rst index 673f8689736f1..8270b2ee49bd8 100644 --- a/doc/source/user_guide/10min.rst +++ b/doc/source/user_guide/10min.rst @@ -34,9 +34,9 @@ and labeled columns: .. ipython:: python - dates = pd.date_range('20130101', periods=6) + dates = pd.date_range("20130101", periods=6) dates - df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list('ABCD')) + df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list("ABCD")) df Creating a :class:`DataFrame` by passing a dict of objects that can be converted to series-like. @@ -156,7 +156,7 @@ Sorting by values: .. ipython:: python - df.sort_values(by='B') + df.sort_values(by="B") Selection --------- @@ -178,14 +178,14 @@ equivalent to ``df.A``: .. ipython:: python - df['A'] + df["A"] Selecting via ``[]``, which slices the rows. .. ipython:: python df[0:3] - df['20130102':'20130104'] + df["20130102":"20130104"] Selection by label ~~~~~~~~~~~~~~~~~~ @@ -202,31 +202,31 @@ Selecting on a multi-axis by label: .. ipython:: python - df.loc[:, ['A', 'B']] + df.loc[:, ["A", "B"]] Showing label slicing, both endpoints are *included*: .. ipython:: python - df.loc['20130102':'20130104', ['A', 'B']] + df.loc["20130102":"20130104", ["A", "B"]] Reduction in the dimensions of the returned object: .. ipython:: python - df.loc['20130102', ['A', 'B']] + df.loc["20130102", ["A", "B"]] For getting a scalar value: .. ipython:: python - df.loc[dates[0], 'A'] + df.loc[dates[0], "A"] For getting fast access to a scalar (equivalent to the prior method): .. ipython:: python - df.at[dates[0], 'A'] + df.at[dates[0], "A"] Selection by position ~~~~~~~~~~~~~~~~~~~~~ @@ -282,7 +282,7 @@ Using a single column's values to select data. .. ipython:: python - df[df['A'] > 0] + df[df["A"] > 0] Selecting values from a DataFrame where a boolean condition is met. @@ -295,9 +295,9 @@ Using the :func:`~Series.isin` method for filtering: .. ipython:: python df2 = df.copy() - df2['E'] = ['one', 'one', 'two', 'three', 'four', 'three'] + df2["E"] = ["one", "one", "two", "three", "four", "three"] df2 - df2[df2['E'].isin(['two', 'four'])] + df2[df2["E"].isin(["two", "four"])] Setting ~~~~~~~ @@ -307,15 +307,15 @@ by the indexes. .. ipython:: python - s1 = pd.Series([1, 2, 3, 4, 5, 6], index=pd.date_range('20130102', periods=6)) + s1 = pd.Series([1, 2, 3, 4, 5, 6], index=pd.date_range("20130102", periods=6)) s1 - df['F'] = s1 + df["F"] = s1 Setting values by label: .. ipython:: python - df.at[dates[0], 'A'] = 0 + df.at[dates[0], "A"] = 0 Setting values by position: @@ -327,7 +327,7 @@ Setting by assigning with a NumPy array: .. ipython:: python - df.loc[:, 'D'] = np.array([5] * len(df)) + df.loc[:, "D"] = np.array([5] * len(df)) The result of the prior setting operations. @@ -356,15 +356,15 @@ returns a copy of the data. .. ipython:: python - df1 = df.reindex(index=dates[0:4], columns=list(df.columns) + ['E']) - df1.loc[dates[0]:dates[1], 'E'] = 1 + df1 = df.reindex(index=dates[0:4], columns=list(df.columns) + ["E"]) + df1.loc[dates[0] : dates[1], "E"] = 1 df1 To drop any rows that have missing data. .. ipython:: python - df1.dropna(how='any') + df1.dropna(how="any") Filling missing data. @@ -408,7 +408,7 @@ In addition, pandas automatically broadcasts along the specified dimension. s = pd.Series([1, 3, 5, np.nan, 6, 8], index=dates).shift(2) s - df.sub(s, axis='index') + df.sub(s, axis="index") Apply @@ -444,7 +444,7 @@ some cases always uses them). See more at :ref:`Vectorized String Methods .. ipython:: python - s = pd.Series(['A', 'B', 'C', 'Aaba', 'Baca', np.nan, 'CABA', 'dog', 'cat']) + s = pd.Series(["A", "B", "C", "Aaba", "Baca", np.nan, "CABA", "dog", "cat"]) s.str.lower() Merge @@ -486,21 +486,21 @@ SQL style merges. See the :ref:`Database style joining ` section. .. ipython:: python - left = pd.DataFrame({'key': ['foo', 'foo'], 'lval': [1, 2]}) - right = pd.DataFrame({'key': ['foo', 'foo'], 'rval': [4, 5]}) + left = pd.DataFrame({"key": ["foo", "foo"], "lval": [1, 2]}) + right = pd.DataFrame({"key": ["foo", "foo"], "rval": [4, 5]}) left right - pd.merge(left, right, on='key') + pd.merge(left, right, on="key") Another example that can be given is: .. ipython:: python - left = pd.DataFrame({'key': ['foo', 'bar'], 'lval': [1, 2]}) - right = pd.DataFrame({'key': ['foo', 'bar'], 'rval': [4, 5]}) + left = pd.DataFrame({"key": ["foo", "bar"], "lval": [1, 2]}) + right = pd.DataFrame({"key": ["foo", "bar"], "rval": [4, 5]}) left right - pd.merge(left, right, on='key') + pd.merge(left, right, on="key") Grouping -------- @@ -531,14 +531,14 @@ groups. .. ipython:: python - df.groupby('A').sum() + df.groupby("A").sum() Grouping by multiple columns forms a hierarchical index, and again we can apply the :meth:`~pandas.core.groupby.GroupBy.sum` function. .. ipython:: python - df.groupby(['A', 'B']).sum() + df.groupby(["A", "B"]).sum() Reshaping --------- @@ -559,8 +559,8 @@ Stack ] ) ) - index = pd.MultiIndex.from_tuples(tuples, names=['first', 'second']) - df = pd.DataFrame(np.random.randn(8, 2), index=index, columns=['A', 'B']) + index = pd.MultiIndex.from_tuples(tuples, names=["first", "second"]) + df = pd.DataFrame(np.random.randn(8, 2), index=index, columns=["A", "B"]) df2 = df[:4] df2 @@ -603,7 +603,7 @@ We can produce pivot tables from this data very easily: .. ipython:: python - pd.pivot_table(df, values='D', index=['A', 'B'], columns=['C']) + pd.pivot_table(df, values="D", index=["A", "B"], columns=["C"]) Time series @@ -616,31 +616,31 @@ financial applications. See the :ref:`Time Series section `. .. ipython:: python - rng = pd.date_range('1/1/2012', periods=100, freq='S') + rng = pd.date_range("1/1/2012", periods=100, freq="S") ts = pd.Series(np.random.randint(0, 500, len(rng)), index=rng) - ts.resample('5Min').sum() + ts.resample("5Min").sum() Time zone representation: .. ipython:: python - rng = pd.date_range('3/6/2012 00:00', periods=5, freq='D') + rng = pd.date_range("3/6/2012 00:00", periods=5, freq="D") ts = pd.Series(np.random.randn(len(rng)), rng) ts - ts_utc = ts.tz_localize('UTC') + ts_utc = ts.tz_localize("UTC") ts_utc Converting to another time zone: .. ipython:: python - ts_utc.tz_convert('US/Eastern') + ts_utc.tz_convert("US/Eastern") Converting between time span representations: .. ipython:: python - rng = pd.date_range('1/1/2012', periods=5, freq='M') + rng = pd.date_range("1/1/2012", periods=5, freq="M") ts = pd.Series(np.random.randn(len(rng)), index=rng) ts ps = ts.to_period() @@ -654,9 +654,9 @@ the quarter end: .. ipython:: python - prng = pd.period_range('1990Q1', '2000Q4', freq='Q-NOV') + prng = pd.period_range("1990Q1", "2000Q4", freq="Q-NOV") ts = pd.Series(np.random.randn(len(prng)), prng) - ts.index = (prng.asfreq('M', 'e') + 1).asfreq('H', 's') + 9 + ts.index = (prng.asfreq("M", "e") + 1).asfreq("H", "s") + 9 ts.head() Categoricals @@ -754,19 +754,20 @@ CSV .. ipython:: python - df.to_csv('foo.csv') + df.to_csv("foo.csv") :ref:`Reading from a csv file. ` .. ipython:: python - pd.read_csv('foo.csv') + pd.read_csv("foo.csv") .. ipython:: python :suppress: import os - os.remove('foo.csv') + + os.remove("foo.csv") HDF5 ~~~~ @@ -777,18 +778,18 @@ Writing to a HDF5 Store. .. ipython:: python - df.to_hdf('foo.h5', 'df') + df.to_hdf("foo.h5", "df") Reading from a HDF5 Store. .. ipython:: python - pd.read_hdf('foo.h5', 'df') + pd.read_hdf("foo.h5", "df") .. ipython:: python :suppress: - os.remove('foo.h5') + os.remove("foo.h5") Excel ~~~~~ @@ -799,18 +800,18 @@ Writing to an excel file. .. ipython:: python - df.to_excel('foo.xlsx', sheet_name='Sheet1') + df.to_excel("foo.xlsx", sheet_name="Sheet1") Reading from an excel file. .. ipython:: python - pd.read_excel('foo.xlsx', 'Sheet1', index_col=None, na_values=['NA']) + pd.read_excel("foo.xlsx", "Sheet1", index_col=None, na_values=["NA"]) .. ipython:: python :suppress: - os.remove('foo.xlsx') + os.remove("foo.xlsx") Gotchas ------- From 66527b1f24059718352efbb6eb7bba73d96aacbe Mon Sep 17 00:00:00 2001 From: beanan Date: Sun, 4 Oct 2020 12:35:13 +0800 Subject: [PATCH 2/6] DOC: black user_guide/enhancingperf.rst code style --- doc/source/user_guide/enhancingperf.rst | 119 +++++++++++++----------- 1 file changed, 63 insertions(+), 56 deletions(-) diff --git a/doc/source/user_guide/enhancingperf.rst b/doc/source/user_guide/enhancingperf.rst index ce9db0a5279c3..51caf911e328a 100644 --- a/doc/source/user_guide/enhancingperf.rst +++ b/doc/source/user_guide/enhancingperf.rst @@ -48,10 +48,14 @@ We have a ``DataFrame`` to which we want to apply a function row-wise. .. ipython:: python - df = pd.DataFrame({'a': np.random.randn(1000), - 'b': np.random.randn(1000), - 'N': np.random.randint(100, 1000, (1000)), - 'x': 'x'}) + df = pd.DataFrame( + { + "a": np.random.randn(1000), + "b": np.random.randn(1000), + "N": np.random.randint(100, 1000, (1000)), + "x": "x", + } + ) df Here's the function in pure Python: @@ -61,6 +65,7 @@ Here's the function in pure Python: def f(x): return x * (x - 1) + def integrate_f(a, b, N): s = 0 dx = (b - a) / N @@ -81,7 +86,7 @@ four calls) using the `prun ipython magic function 0) & (df2 > 0) & (df3 > 0) & (df4 > 0)') + %timeit pd.eval("(df1 > 0) & (df2 > 0) & (df3 > 0) & (df4 > 0)") :func:`~pandas.eval` also works with unaligned pandas objects: @@ -560,7 +563,7 @@ Now let's do the same thing but with comparisons: .. ipython:: python - %timeit pd.eval('df1 + df2 + df3 + df4 + s') + %timeit pd.eval("df1 + df2 + df3 + df4 + s") .. note:: @@ -587,19 +590,19 @@ evaluate an expression in the "context" of a :class:`~pandas.DataFrame`. :suppress: try: - del a + del a except NameError: - pass + pass try: - del b + del b except NameError: - pass + pass .. ipython:: python - df = pd.DataFrame(np.random.randn(5, 2), columns=['a', 'b']) - df.eval('a + b') + df = pd.DataFrame(np.random.randn(5, 2), columns=["a", "b"]) + df.eval("a + b") Any expression that is a valid :func:`pandas.eval` expression is also a valid :meth:`DataFrame.eval` expression, with the added benefit that you don't have to @@ -617,9 +620,9 @@ on the original ``DataFrame`` or return a copy with the new column. .. ipython:: python df = pd.DataFrame(dict(a=range(5), b=range(5, 10))) - df.eval('c = a + b', inplace=True) - df.eval('d = a + b + c', inplace=True) - df.eval('a = 1', inplace=True) + df.eval("c = a + b", inplace=True) + df.eval("d = a + b + c", inplace=True) + df.eval("a = 1", inplace=True) df When ``inplace`` is set to ``False``, the default, a copy of the ``DataFrame`` with the @@ -628,7 +631,7 @@ new or modified columns is returned and the original frame is unchanged. .. ipython:: python df - df.eval('e = a - c', inplace=False) + df.eval("e = a - c", inplace=False) df As a convenience, multiple assignments can be performed by using a @@ -636,19 +639,22 @@ multi-line string. .. ipython:: python - df.eval(""" + df.eval( + """ c = a + b d = a + b + c - a = 1""", inplace=False) + a = 1""", + inplace=False, + ) The equivalent in standard Python would be .. ipython:: python df = pd.DataFrame(dict(a=range(5), b=range(5, 10))) - df['c'] = df['a'] + df['b'] - df['d'] = df['a'] + df['b'] + df['c'] - df['a'] = 1 + df["c"] = df["a"] + df["b"] + df["d"] = df["a"] + df["b"] + df["c"] + df["a"] = 1 df The ``query`` method has a ``inplace`` keyword which determines @@ -657,8 +663,8 @@ whether the query modifies the original frame. .. ipython:: python df = pd.DataFrame(dict(a=range(5), b=range(5, 10))) - df.query('a > 2') - df.query('a > 2', inplace=True) + df.query("a > 2") + df.query("a > 2", inplace=True) df Local variables @@ -669,10 +675,10 @@ expression by placing the ``@`` character in front of the name. For example, .. ipython:: python - df = pd.DataFrame(np.random.randn(5, 2), columns=list('ab')) + df = pd.DataFrame(np.random.randn(5, 2), columns=list("ab")) newcol = np.random.randn(len(df)) - df.eval('b + @newcol') - df.query('b < @newcol') + df.eval("b + @newcol") + df.query("b < @newcol") If you don't prefix the local variable with ``@``, pandas will raise an exception telling you the variable is undefined. @@ -685,8 +691,8 @@ name in an expression. .. ipython:: python a = np.random.randn() - df.query('@a < a') - df.loc[a < df['a']] # same as the previous expression + df.query("@a < a") + df.loc[a < df["a"]] # same as the previous expression With :func:`pandas.eval` you cannot use the ``@`` prefix *at all*, because it isn't defined in that context. ``pandas`` will let you know this if you try to @@ -696,14 +702,14 @@ use ``@`` in a top-level call to :func:`pandas.eval`. For example, :okexcept: a, b = 1, 2 - pd.eval('@a + b') + pd.eval("@a + b") In this case, you should simply refer to the variables like you would in standard Python. .. ipython:: python - pd.eval('a + b') + pd.eval("a + b") :func:`pandas.eval` parsers @@ -723,10 +729,10 @@ semantics. .. ipython:: python - expr = '(df1 > 0) & (df2 > 0) & (df3 > 0) & (df4 > 0)' - x = pd.eval(expr, parser='python') - expr_no_parens = 'df1 > 0 & df2 > 0 & df3 > 0 & df4 > 0' - y = pd.eval(expr_no_parens, parser='pandas') + expr = "(df1 > 0) & (df2 > 0) & (df3 > 0) & (df4 > 0)" + x = pd.eval(expr, parser="python") + expr_no_parens = "df1 > 0 & df2 > 0 & df3 > 0 & df4 > 0" + y = pd.eval(expr_no_parens, parser="pandas") np.all(x == y) @@ -735,10 +741,10 @@ well: .. ipython:: python - expr = '(df1 > 0) & (df2 > 0) & (df3 > 0) & (df4 > 0)' - x = pd.eval(expr, parser='python') - expr_with_ands = 'df1 > 0 and df2 > 0 and df3 > 0 and df4 > 0' - y = pd.eval(expr_with_ands, parser='pandas') + expr = "(df1 > 0) & (df2 > 0) & (df3 > 0) & (df4 > 0)" + x = pd.eval(expr, parser="python") + expr_with_ands = "df1 > 0 and df2 > 0 and df3 > 0 and df4 > 0" + y = pd.eval(expr_with_ands, parser="pandas") np.all(x == y) @@ -768,7 +774,7 @@ is a bit slower (not by much) than evaluating the same expression in Python .. ipython:: python - %timeit pd.eval('df1 + df2 + df3 + df4', engine='python') + %timeit pd.eval("df1 + df2 + df3 + df4", engine="python") :func:`pandas.eval` performance @@ -812,8 +818,9 @@ you have an expression--for example .. ipython:: python - df = pd.DataFrame({'strings': np.repeat(list('cba'), 3), - 'nums': np.repeat(range(3), 3)}) + df = pd.DataFrame( + {"strings": np.repeat(list("cba"), 3), "nums": np.repeat(range(3), 3)} + ) df df.query('strings == "a" and nums == 1') From 3aebca2ad6066ae6fc4ff1516e76c60794300ed5 Mon Sep 17 00:00:00 2001 From: beanan Date: Sun, 4 Oct 2020 13:05:43 +0800 Subject: [PATCH 3/6] DOC: Adjust enhancingperf.rst code style --- doc/source/user_guide/enhancingperf.rst | 28 ++++++++++++------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/doc/source/user_guide/enhancingperf.rst b/doc/source/user_guide/enhancingperf.rst index 51caf911e328a..d388053bf31c0 100644 --- a/doc/source/user_guide/enhancingperf.rst +++ b/doc/source/user_guide/enhancingperf.rst @@ -77,7 +77,7 @@ We achieve our result by using ``apply`` (row-wise): .. code-block:: ipython - In [7]: %timeit df.apply(lambda x: integrate_f(x['a'], x['b'], x['N']), axis=1) + In [7]: %timeit df.apply(lambda x: integrate_f(x["a"], x["b"], x["N"]), axis=1) 10 loops, best of 3: 174 ms per loop But clearly this isn't fast enough for us. Let's take a look and see where the @@ -128,7 +128,7 @@ is here to distinguish between function versions): .. code-block:: ipython - In [4]: %timeit df.apply(lambda x: integrate_f_plain(x['a'], x['b'], x['N']), axis=1) + In [4]: %timeit df.apply(lambda x: integrate_f_plain(x["a"], x["b"], x["N"]), axis=1) 10 loops, best of 3: 85.5 ms per loop Already this has shaved a third off, not too bad for a simple copy and paste. @@ -157,7 +157,7 @@ We get another huge improvement simply by providing type information: .. code-block:: ipython - In [4]: %timeit df.apply(lambda x: integrate_f_typed(x['a'], x['b'], x['N']), axis=1) + In [4]: %timeit df.apply(lambda x: integrate_f_typed(x["a"], x["b"], x["N"]), axis=1) 10 loops, best of 3: 20.3 ms per loop Now, we're talking! It's now over ten times faster than the original python @@ -240,9 +240,9 @@ the rows, applying our ``integrate_f_typed``, and putting this in the zeros arra .. code-block:: ipython - In [4]: %timeit apply_integrate_f(df['a'].to_numpy(), - df['b'].to_numpy(), - df['N'].to_numpy()) + In [4]: %timeit apply_integrate_f( + df["a"].to_numpy(), df["b"].to_numpy(), df["N"].to_numpy() + ) 1000 loops, best of 3: 1.25 ms per loop We've gotten another big improvement. Let's check again where the time is spent: @@ -296,9 +296,9 @@ advanced Cython techniques: .. code-block:: ipython - In [4]: %timeit apply_integrate_f_wrap(df['a'].to_numpy(), - df['b'].to_numpy(), - df['N'].to_numpy()) + In [4]: %timeit apply_integrate_f_wrap( + df["a"].to_numpy(), df["b"].to_numpy(), df["N"].to_numpy() + ) 1000 loops, best of 3: 987 us per loop Even faster, with the caveat that a bug in our Cython code (an off-by-one error, @@ -406,15 +406,15 @@ Consider the following toy example of doubling each observation: .. code-block:: ipython # Custom function without numba - In [5]: %timeit df['col1_doubled'] = df['a'].apply(double_every_value_nonumba) # noqa E501 + In [5]: %timeit df["col1_doubled"] = df["a"].apply(double_every_value_nonumba) # noqa E501 1000 loops, best of 3: 797 us per loop # Standard implementation (faster than a custom function) - In [6]: %timeit df['col1_doubled'] = df['a'] * 2 + In [6]: %timeit df["col1_doubled"] = df["a"] * 2 1000 loops, best of 3: 233 us per loop # Custom function with numba - In [7]: %timeit df['col1_doubled'] = double_every_value_withnumba(df['a'].to_numpy()) + In [7]: %timeit df["col1_doubled"] = double_every_value_withnumba(df["a"].to_numpy()) 1000 loops, best of 3: 145 us per loop Caveats @@ -490,7 +490,7 @@ These operations are supported by :func:`pandas.eval`: * ``list`` and ``tuple`` literals, e.g., ``[1, 2]`` or ``(1, 2)`` * Attribute access, e.g., ``df.a`` * Subscript expressions, e.g., ``df[0]`` -* Simple variable evaluation, e.g., ``pd.eval('df')`` (this is not very useful) +* Simple variable evaluation, e.g., ``pd.eval("df")`` (this is not very useful) * Math functions: ``sin``, ``cos``, ``exp``, ``log``, ``expm1``, ``log1p``, ``sqrt``, ``sinh``, ``cosh``, ``tanh``, ``arcsin``, ``arccos``, ``arctan``, ``arccosh``, ``arcsinh``, ``arctanh``, ``abs``, ``arctan2`` and ``log10``. @@ -822,7 +822,7 @@ you have an expression--for example {"strings": np.repeat(list("cba"), 3), "nums": np.repeat(range(3), 3)} ) df - df.query('strings == "a" and nums == 1') + df.query("strings == "a" and nums == 1") the numeric part of the comparison (``nums == 1``) will be evaluated by ``numexpr``. From 1822b526a22e5044922906102f4d4e0c82413f4d Mon Sep 17 00:00:00 2001 From: beanan Date: Sun, 4 Oct 2020 22:59:07 +0800 Subject: [PATCH 4/6] DOC: Remove extra line breaks --- doc/source/user_guide/enhancingperf.rst | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/doc/source/user_guide/enhancingperf.rst b/doc/source/user_guide/enhancingperf.rst index d388053bf31c0..e6bd2a3b5518f 100644 --- a/doc/source/user_guide/enhancingperf.rst +++ b/doc/source/user_guide/enhancingperf.rst @@ -240,18 +240,14 @@ the rows, applying our ``integrate_f_typed``, and putting this in the zeros arra .. code-block:: ipython - In [4]: %timeit apply_integrate_f( - df["a"].to_numpy(), df["b"].to_numpy(), df["N"].to_numpy() - ) + In [4]: %timeit apply_integrate_f(df["a"].to_numpy(), df["b"].to_numpy(), df["N"].to_numpy()) 1000 loops, best of 3: 1.25 ms per loop We've gotten another big improvement. Let's check again where the time is spent: .. ipython:: python - %%prun -l 4 apply_integrate_f( - df["a"].to_numpy(), df["b"].to_numpy(), df["N"].to_numpy() - ) + %%prun -l 4 apply_integrate_f(df["a"].to_numpy(), df["b"].to_numpy(), df["N"].to_numpy()) As one might expect, the majority of the time is now spent in ``apply_integrate_f``, so if we wanted to make anymore efficiencies we must continue to concentrate our @@ -296,9 +292,7 @@ advanced Cython techniques: .. code-block:: ipython - In [4]: %timeit apply_integrate_f_wrap( - df["a"].to_numpy(), df["b"].to_numpy(), df["N"].to_numpy() - ) + In [4]: %timeit apply_integrate_f_wrap(df["a"].to_numpy(), df["b"].to_numpy(), df["N"].to_numpy()) 1000 loops, best of 3: 987 us per loop Even faster, with the caveat that a bug in our Cython code (an off-by-one error, From 6657d4f338c2bf6f526158340c9f5e9104525020 Mon Sep 17 00:00:00 2001 From: beanan Date: Sun, 4 Oct 2020 23:48:29 +0800 Subject: [PATCH 5/6] add E203 to setup.cfg[flake8-rst] --- setup.cfg | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 73986f692b6cd..8702e903d825b 100644 --- a/setup.cfg +++ b/setup.cfg @@ -39,7 +39,8 @@ bootstrap = import pandas as pd np # avoiding error when importing again numpy or pandas pd # (in some cases we want to do it to show users) -ignore = E402, # module level import not at top of file +ignore = E203, # space before : (needed for how black formats slicing) + E402, # module level import not at top of file W503, # line break before binary operator # Classes/functions in different blocks can generate those errors E302, # expected 2 blank lines, found 0 From c47ffc62e7d854c6f8b82499af7fec75fcca9697 Mon Sep 17 00:00:00 2001 From: beanan Date: Mon, 5 Oct 2020 00:54:52 +0800 Subject: [PATCH 6/6] fix quote error --- doc/source/user_guide/enhancingperf.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/user_guide/enhancingperf.rst b/doc/source/user_guide/enhancingperf.rst index e6bd2a3b5518f..d30554986607d 100644 --- a/doc/source/user_guide/enhancingperf.rst +++ b/doc/source/user_guide/enhancingperf.rst @@ -816,7 +816,7 @@ you have an expression--for example {"strings": np.repeat(list("cba"), 3), "nums": np.repeat(range(3), 3)} ) df - df.query("strings == "a" and nums == 1") + df.query("strings == 'a' and nums == 1") the numeric part of the comparison (``nums == 1``) will be evaluated by ``numexpr``.