diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx
index 0988cd7ff0dde..b2db2a2934e16 100644
--- a/pandas/_libs/reduction.pyx
+++ b/pandas/_libs/reduction.pyx
@@ -7,11 +7,8 @@ from libc.stdlib cimport malloc, free
 
 import numpy as np
 cimport numpy as cnp
-from numpy cimport (ndarray,
-                    int64_t,
-                    PyArray_SETITEM,
-                    PyArray_ITER_NEXT, PyArray_ITER_DATA, PyArray_IterNew,
-                    flatiter)
+from numpy cimport ndarray, int64_t
+
 cnp.import_array()
 
 from pandas._libs cimport util
@@ -45,16 +42,11 @@ cdef class Reducer:
         n, k = (<object>arr).shape
 
         if axis == 0:
-            if not arr.flags.f_contiguous:
-                arr = arr.copy('F')
-
             self.nresults = k
             self.chunksize = n
             self.increment = n * arr.dtype.itemsize
         else:
-            if not arr.flags.c_contiguous:
-                arr = arr.copy('C')
-
+            arr = arr.T
             self.nresults = n
             self.chunksize = k
             self.increment = k * arr.dtype.itemsize
@@ -62,6 +54,7 @@ cdef class Reducer:
         self.f = f
         self.arr = arr
         self.labels = labels
+
         self.dummy, self.typ, self.index, self.ityp = self._check_dummy(
             dummy=dummy)
 
@@ -92,34 +85,25 @@ cdef class Reducer:
 
     def get_result(self):
         cdef:
-            char* dummy_buf
-            ndarray arr, result, chunk
+            ndarray arr, result
             Py_ssize_t i
-            flatiter it
             object res, name, labels
             object cached_typ = None
 
         arr = self.arr
-        chunk = self.dummy
-        dummy_buf = chunk.data
-        chunk.data = arr.data
         labels = self.labels
 
         result = np.empty(self.nresults, dtype='O')
-        it = <flatiter>PyArray_IterNew(result)
 
-        try:
-            for i in range(self.nresults):
-
-                # create the cached type
-                # each time just reassign the data
+        with np.nditer([arr, result], flags=["reduce_ok", "external_loop", "refs_ok"], op_flags=[["readonly"], ["readwrite"]], order="F") as it: 
+            for i, (x, y) in enumerate(it):
                 if i == 0:
 
                     if self.typ is not None:
                         # In this case, we also have self.index
                         name = labels[i]
                         cached_typ = self.typ(
-                            chunk, index=self.index, name=name, dtype=arr.dtype)
+                            x, index=self.index, name=name, dtype=arr.dtype)
 
                 # use the cached_typ if possible
                 if cached_typ is not None:
@@ -127,25 +111,20 @@ cdef class Reducer:
                     name = labels[i]
 
                     object.__setattr__(
-                        cached_typ._mgr._block, 'values', chunk)
+                        cached_typ._mgr._block, 'values', x)
                     object.__setattr__(cached_typ, 'name', name)
                     res = self.f(cached_typ)
                 else:
-                    res = self.f(chunk)
+                    res = self.f(x)
 
                 # TODO: reason for not squeezing here?
                 res = _extract_result(res, squeeze=False)
                 if i == 0:
                     # On the first pass, we check the output shape to see
                     #  if this looks like a reduction.
-                    _check_result_array(res, len(self.dummy))
+                    _check_result_array(res, len(x))
 
-                PyArray_SETITEM(result, PyArray_ITER_DATA(it), res)
-                chunk.data = chunk.data + self.increment
-                PyArray_ITER_NEXT(it)
-        finally:
-            # so we don't free the wrong memory
-            chunk.data = dummy_buf
+                y[...] = res
 
         result = maybe_convert_objects(result)
         return result
diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx
index c336e5f990f9a..395160b3daec2 100644
--- a/pandas/_libs/tslibs/timedeltas.pyx
+++ b/pandas/_libs/tslibs/timedeltas.pyx
@@ -218,7 +218,7 @@ cdef convert_to_timedelta64(object ts, object unit):
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def array_to_timedelta64(object[:] values, unit='ns', errors='raise'):
+def array_to_timedelta64(object values, unit='ns', errors='raise'):
     """
     Convert an ndarray to an array of timedeltas. If errors == 'coerce',
     coerce non-convertible objects to NaT. Otherwise, raise.