|
17 | 17 |
|
18 | 18 | from pandas.util._decorators import cache_readonly
|
19 | 19 |
|
20 |
| -from pandas.core.dtypes.common import ( |
21 |
| - is_bool, |
22 |
| - is_iterator, |
23 |
| -) |
| 20 | +from pandas.core.dtypes.common import is_bool |
24 | 21 | from pandas.core.dtypes.concat import concat_compat
|
25 | 22 | from pandas.core.dtypes.generic import (
|
26 | 23 | ABCDataFrame,
|
@@ -423,11 +420,12 @@ def __init__(
|
423 | 420 | self.ignore_index = ignore_index
|
424 | 421 | self.verify_integrity = verify_integrity
|
425 | 422 |
|
426 |
| - objs, keys = self._clean_keys_and_objs(objs, keys) |
| 423 | + objs, keys, ndims = _clean_keys_and_objs(objs, keys) |
427 | 424 |
|
428 |
| - # figure out what our result ndim is going to be |
429 |
| - ndims = self._get_ndims(objs) |
430 |
| - sample, objs = self._get_sample_object(objs, ndims, keys, names, levels) |
| 425 | + # select an object to be our result reference |
| 426 | + sample, objs = _get_sample_object( |
| 427 | + objs, ndims, keys, names, levels, self.intersect |
| 428 | + ) |
431 | 429 |
|
432 | 430 | # Standardize axis parameter to int
|
433 | 431 | if sample.ndim == 1:
|
@@ -458,100 +456,6 @@ def __init__(
|
458 | 456 | self.names = names or getattr(keys, "names", None)
|
459 | 457 | self.levels = levels
|
460 | 458 |
|
461 |
| - def _get_ndims(self, objs: list[Series | DataFrame]) -> set[int]: |
462 |
| - # figure out what our result ndim is going to be |
463 |
| - ndims = set() |
464 |
| - for obj in objs: |
465 |
| - if not isinstance(obj, (ABCSeries, ABCDataFrame)): |
466 |
| - msg = ( |
467 |
| - f"cannot concatenate object of type '{type(obj)}'; " |
468 |
| - "only Series and DataFrame objs are valid" |
469 |
| - ) |
470 |
| - raise TypeError(msg) |
471 |
| - |
472 |
| - ndims.add(obj.ndim) |
473 |
| - return ndims |
474 |
| - |
475 |
| - def _clean_keys_and_objs( |
476 |
| - self, |
477 |
| - objs: Iterable[Series | DataFrame] | Mapping[HashableT, Series | DataFrame], |
478 |
| - keys, |
479 |
| - ) -> tuple[list[Series | DataFrame], Index | None]: |
480 |
| - if isinstance(objs, abc.Mapping): |
481 |
| - if keys is None: |
482 |
| - keys = list(objs.keys()) |
483 |
| - objs_list = [objs[k] for k in keys] |
484 |
| - else: |
485 |
| - objs_list = list(objs) |
486 |
| - |
487 |
| - if len(objs_list) == 0: |
488 |
| - raise ValueError("No objects to concatenate") |
489 |
| - |
490 |
| - if keys is None: |
491 |
| - objs_list = list(com.not_none(*objs_list)) |
492 |
| - else: |
493 |
| - # GH#1649 |
494 |
| - key_indices = [] |
495 |
| - clean_objs = [] |
496 |
| - if is_iterator(keys): |
497 |
| - keys = list(keys) |
498 |
| - if len(keys) != len(objs_list): |
499 |
| - # GH#43485 |
500 |
| - raise ValueError( |
501 |
| - f"The length of the keys ({len(keys)}) must match " |
502 |
| - f"the length of the objects to concatenate ({len(objs_list)})" |
503 |
| - ) |
504 |
| - for i, obj in enumerate(objs_list): |
505 |
| - if obj is not None: |
506 |
| - key_indices.append(i) |
507 |
| - clean_objs.append(obj) |
508 |
| - objs_list = clean_objs |
509 |
| - |
510 |
| - if not isinstance(keys, Index): |
511 |
| - keys = Index(keys) |
512 |
| - |
513 |
| - if len(key_indices) < len(keys): |
514 |
| - keys = keys.take(key_indices) |
515 |
| - |
516 |
| - if len(objs_list) == 0: |
517 |
| - raise ValueError("All objects passed were None") |
518 |
| - |
519 |
| - return objs_list, keys |
520 |
| - |
521 |
| - def _get_sample_object( |
522 |
| - self, |
523 |
| - objs: list[Series | DataFrame], |
524 |
| - ndims: set[int], |
525 |
| - keys, |
526 |
| - names, |
527 |
| - levels, |
528 |
| - ) -> tuple[Series | DataFrame, list[Series | DataFrame]]: |
529 |
| - # get the sample |
530 |
| - # want the highest ndim that we have, and must be non-empty |
531 |
| - # unless all objs are empty |
532 |
| - sample: Series | DataFrame | None = None |
533 |
| - if len(ndims) > 1: |
534 |
| - max_ndim = max(ndims) |
535 |
| - for obj in objs: |
536 |
| - if obj.ndim == max_ndim and np.sum(obj.shape): |
537 |
| - sample = obj |
538 |
| - break |
539 |
| - |
540 |
| - else: |
541 |
| - # filter out the empties if we have not multi-index possibilities |
542 |
| - # note to keep empty Series as it affect to result columns / name |
543 |
| - non_empties = [obj for obj in objs if sum(obj.shape) > 0 or obj.ndim == 1] |
544 |
| - |
545 |
| - if len(non_empties) and ( |
546 |
| - keys is None and names is None and levels is None and not self.intersect |
547 |
| - ): |
548 |
| - objs = non_empties |
549 |
| - sample = objs[0] |
550 |
| - |
551 |
| - if sample is None: |
552 |
| - sample = objs[0] |
553 |
| - return sample, objs |
554 |
| - |
555 | 459 | def _sanitize_mixed_ndim(
|
556 | 460 | self,
|
557 | 461 | objs: list[Series | DataFrame],
|
@@ -664,29 +568,24 @@ def get_result(self):
|
664 | 568 | out = sample._constructor_from_mgr(new_data, axes=new_data.axes)
|
665 | 569 | return out.__finalize__(self, method="concat")
|
666 | 570 |
|
667 |
| - def _get_result_dim(self) -> int: |
668 |
| - if self._is_series and self.bm_axis == 1: |
669 |
| - return 2 |
670 |
| - else: |
671 |
| - return self.objs[0].ndim |
672 |
| - |
673 | 571 | @cache_readonly
|
674 | 572 | def new_axes(self) -> list[Index]:
|
675 |
| - ndim = self._get_result_dim() |
| 573 | + if self._is_series and self.bm_axis == 1: |
| 574 | + ndim = 2 |
| 575 | + else: |
| 576 | + ndim = self.objs[0].ndim |
676 | 577 | return [
|
677 |
| - self._get_concat_axis if i == self.bm_axis else self._get_comb_axis(i) |
| 578 | + self._get_concat_axis |
| 579 | + if i == self.bm_axis |
| 580 | + else get_objs_combined_axis( |
| 581 | + self.objs, |
| 582 | + axis=self.objs[0]._get_block_manager_axis(i), |
| 583 | + intersect=self.intersect, |
| 584 | + sort=self.sort, |
| 585 | + ) |
678 | 586 | for i in range(ndim)
|
679 | 587 | ]
|
680 | 588 |
|
681 |
| - def _get_comb_axis(self, i: AxisInt) -> Index: |
682 |
| - data_axis = self.objs[0]._get_block_manager_axis(i) |
683 |
| - return get_objs_combined_axis( |
684 |
| - self.objs, |
685 |
| - axis=data_axis, |
686 |
| - intersect=self.intersect, |
687 |
| - sort=self.sort, |
688 |
| - ) |
689 |
| - |
690 | 589 | @cache_readonly
|
691 | 590 | def _get_concat_axis(self) -> Index:
|
692 | 591 | """
|
@@ -747,6 +646,98 @@ def _maybe_check_integrity(self, concat_index: Index) -> None:
|
747 | 646 | raise ValueError(f"Indexes have overlapping values: {overlap}")
|
748 | 647 |
|
749 | 648 |
|
| 649 | +def _clean_keys_and_objs( |
| 650 | + objs: Iterable[Series | DataFrame] | Mapping[HashableT, Series | DataFrame], |
| 651 | + keys, |
| 652 | +) -> tuple[list[Series | DataFrame], Index | None, set[int]]: |
| 653 | + """ |
| 654 | + Returns |
| 655 | + ------- |
| 656 | + clean_objs : list[Series | DataFrame] |
| 657 | + LIst of DataFrame and Series with Nones removed. |
| 658 | + keys : Index | None |
| 659 | + None if keys was None |
| 660 | + Index if objs was a Mapping or keys was not None. Filtered where objs was None. |
| 661 | + ndim : set[int] |
| 662 | + Unique .ndim attribute of obj encountered. |
| 663 | + """ |
| 664 | + if isinstance(objs, abc.Mapping): |
| 665 | + if keys is None: |
| 666 | + keys = objs.keys() |
| 667 | + objs_list = [objs[k] for k in keys] |
| 668 | + else: |
| 669 | + objs_list = list(objs) |
| 670 | + |
| 671 | + if len(objs_list) == 0: |
| 672 | + raise ValueError("No objects to concatenate") |
| 673 | + |
| 674 | + if keys is not None: |
| 675 | + if not isinstance(keys, Index): |
| 676 | + keys = Index(keys) |
| 677 | + if len(keys) != len(objs_list): |
| 678 | + # GH#43485 |
| 679 | + raise ValueError( |
| 680 | + f"The length of the keys ({len(keys)}) must match " |
| 681 | + f"the length of the objects to concatenate ({len(objs_list)})" |
| 682 | + ) |
| 683 | + |
| 684 | + # GH#1649 |
| 685 | + key_indices = [] |
| 686 | + clean_objs = [] |
| 687 | + ndims = set() |
| 688 | + for i, obj in enumerate(objs_list): |
| 689 | + if obj is None: |
| 690 | + continue |
| 691 | + elif isinstance(obj, (ABCSeries, ABCDataFrame)): |
| 692 | + key_indices.append(i) |
| 693 | + clean_objs.append(obj) |
| 694 | + ndims.add(obj.ndim) |
| 695 | + else: |
| 696 | + msg = ( |
| 697 | + f"cannot concatenate object of type '{type(obj)}'; " |
| 698 | + "only Series and DataFrame objs are valid" |
| 699 | + ) |
| 700 | + raise TypeError(msg) |
| 701 | + |
| 702 | + if keys is not None and len(key_indices) < len(keys): |
| 703 | + keys = keys.take(key_indices) |
| 704 | + |
| 705 | + if len(clean_objs) == 0: |
| 706 | + raise ValueError("All objects passed were None") |
| 707 | + |
| 708 | + return clean_objs, keys, ndims |
| 709 | + |
| 710 | + |
| 711 | +def _get_sample_object( |
| 712 | + objs: list[Series | DataFrame], |
| 713 | + ndims: set[int], |
| 714 | + keys, |
| 715 | + names, |
| 716 | + levels, |
| 717 | + intersect: bool, |
| 718 | +) -> tuple[Series | DataFrame, list[Series | DataFrame]]: |
| 719 | + # get the sample |
| 720 | + # want the highest ndim that we have, and must be non-empty |
| 721 | + # unless all objs are empty |
| 722 | + if len(ndims) > 1: |
| 723 | + max_ndim = max(ndims) |
| 724 | + for obj in objs: |
| 725 | + if obj.ndim == max_ndim and sum(obj.shape): # type: ignore[arg-type] |
| 726 | + return obj, objs |
| 727 | + elif keys is None and names is None and levels is None and not intersect: |
| 728 | + # filter out the empties if we have not multi-index possibilities |
| 729 | + # note to keep empty Series as it affect to result columns / name |
| 730 | + if ndims.pop() == 2: |
| 731 | + non_empties = [obj for obj in objs if sum(obj.shape)] |
| 732 | + else: |
| 733 | + non_empties = objs |
| 734 | + |
| 735 | + if len(non_empties): |
| 736 | + return non_empties[0], non_empties |
| 737 | + |
| 738 | + return objs[0], objs |
| 739 | + |
| 740 | + |
750 | 741 | def _concat_indexes(indexes) -> Index:
|
751 | 742 | return indexes[0].append(indexes[1:])
|
752 | 743 |
|
|
0 commit comments