Skip to content

Commit 0a1cf4a

Browse files
committed
Add option to select smart diffing for migrations to catalog compile
This commit adds a new option `-m`, `--migration-diff-output` which can be used to select smarter diffing for a migration which Commodore knows about. This commit also adds the Kapitan 0.29 to 0.30 migration as the only known migration. The smart diffing for this migration ignores reordered K8s objects in multi-document YAML streams, and suppresses diffs which only contain changes `app.kubernetes.io/managed-by: Tiller` -> `app.kubernetes.io/managed-by: Helm` and `heritage: Tiller` -> `heritage: Helm`.
1 parent af54bad commit 0a1cf4a

File tree

5 files changed

+202
-37
lines changed

5 files changed

+202
-37
lines changed

commodore/catalog.py

Lines changed: 69 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,16 @@
1+
import difflib
2+
13
from pathlib import Path as P
2-
from typing import Iterable
4+
from typing import Iterable, Tuple
35

46
import click
7+
import yaml
58

69
from . import git
710
from .helpers import rm_tree_contents, lieutenant_query
811
from .cluster import Cluster
9-
from .config import Config
12+
from .config import Config, Migration
13+
from .k8sobject import K8sObject
1014

1115

1216
def fetch_customer_catalog(config: Config, cluster: Cluster):
@@ -112,6 +116,64 @@ def _push_catalog(cfg: Config, repo, commit_message):
112116
click.echo(" > Skipping commit+push to catalog in local mode...")
113117

114118

119+
def _ignore_kapitan_029_030_non_semantic(line):
120+
"""
121+
Ignore non-semantic changes in the (already sorted by K8s object) diff between
122+
Kapitan 0.29 and 0.30.
123+
124+
This includes:
125+
* Change of "app.kubernetes.io/managed-by: Tiller" to
126+
"app.kubernetes.io/managed-by: Helm"
127+
* Change of "heritage: Tiller" to "heritage: Helm"
128+
* `null` objects not emitted in multi-object YAML documents anymore
129+
"""
130+
line = line.strip()
131+
# We don't care about context and metadata lines
132+
if not (line.startswith("-") or line.startswith("+")):
133+
return True
134+
# Ignore changes where we don't emit a null object preceded or followed
135+
# by a stream separator anymore
136+
if line in ("-null", "----"):
137+
return True
138+
# Ignore changes which are only about Tiller -> Helm as object manager
139+
if line.startswith("-") and (
140+
line.endswith("app.kubernetes.io/managed-by: Tiller")
141+
or line.endswith("heritage: Tiller")
142+
):
143+
return True
144+
if line.startswith("+") and (
145+
line.endswith("app.kubernetes.io/managed-by: Helm")
146+
or line.endswith("heritage: Helm")
147+
):
148+
return True
149+
150+
return False
151+
152+
153+
def _kapitan_029_030_difffunc(
154+
before_text: str, after_text: str, fromfile: str = "", tofile: str = ""
155+
) -> Tuple[Iterable[str], bool]:
156+
before_objs = sorted(yaml.safe_load_all(before_text), key=K8sObject)
157+
before_sorted_lines = yaml.dump_all(before_objs).split("\n")
158+
159+
after_objs = sorted(yaml.safe_load_all(after_text), key=K8sObject)
160+
after_sorted_lines = yaml.dump_all(after_objs).split("\n")
161+
162+
diff = difflib.unified_diff(
163+
before_sorted_lines,
164+
after_sorted_lines,
165+
lineterm="",
166+
fromfile=fromfile,
167+
tofile=tofile,
168+
)
169+
diff_lines = list(diff)
170+
suppress_diff = all(
171+
_ignore_kapitan_029_030_non_semantic(line) for line in diff_lines[2:]
172+
)
173+
174+
return diff_lines, suppress_diff
175+
176+
115177
def update_catalog(cfg: Config, targets: Iterable[str], repo):
116178
click.secho("Updating catalog repository...", bold=True)
117179
# pylint: disable=import-outside-toplevel
@@ -122,7 +184,11 @@ def update_catalog(cfg: Config, targets: Iterable[str], repo):
122184
for target_name in targets:
123185
dir_util.copy_tree(str(cfg.inventory.output_dir / target_name), str(catalogdir))
124186

125-
difftext, changed = git.stage_all(repo)
187+
if cfg.migration == Migration.KAP_029_030:
188+
difftext, changed = git.stage_all(repo, diff_func=_kapitan_029_030_difffunc)
189+
else:
190+
difftext, changed = git.stage_all(repo)
191+
126192
if changed:
127193
indented = textwrap.indent(difftext, " ")
128194
message = f" > Changes:\n{indented}"

commodore/cli.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from dotenv import load_dotenv, find_dotenv
66
from commodore import __git_version__, __version__
77
from .catalog import catalog_list
8-
from .config import Config
8+
from .config import Config, Migration
99
from .helpers import clean_working_tree
1010
from .compile import compile as _compile
1111
from .component.template import ComponentTemplater
@@ -130,6 +130,18 @@ def clean(config: Config, verbose):
130130
default=True,
131131
help="Whether to fetch Jsonnet and Kapitan dependencies in local mode. By default dependencies are fetched.",
132132
)
133+
@click.option(
134+
"-m",
135+
"--migration-diff-output",
136+
help=(
137+
"Specify a migration that you expect to happen for the cluster catalog. "
138+
+ "Currently Commodore only knows the Kapitan 0.29 to 0.30 migration. "
139+
+ "When the Kapitan 0.29 to 0.30 migration is selected, Commodore will suppress "
140+
+ "noise (changing managed-by labels, and reordered objects) caused by the "
141+
+ "migration in the diff output."
142+
),
143+
type=click.Choice([m.value for m in Migration], case_sensitive=False),
144+
)
133145
@verbosity
134146
@pass_config
135147
# pylint: disable=too-many-arguments
@@ -147,6 +159,7 @@ def compile_catalog(
147159
global_repo_revision_override,
148160
tenant_repo_revision_override,
149161
fetch_dependencies,
162+
migration_diff_output,
150163
):
151164
config.update_verbosity(verbose)
152165
config.api_url = api_url
@@ -158,6 +171,7 @@ def compile_catalog(
158171
config.usermail = git_author_email
159172
config.global_repo_revision_override = global_repo_revision_override
160173
config.tenant_repo_revision_override = tenant_repo_revision_override
174+
config.migration = migration_diff_output
161175
if config.push and (
162176
config.global_repo_revision_override or config.tenant_repo_revision_override
163177
):

commodore/config.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
import textwrap
22

3+
from enum import Enum
34
from pathlib import Path as P
4-
from typing import Dict, List
5+
from typing import Dict, List, Optional
56

67
import click
78
from git import Repo
@@ -10,13 +11,18 @@
1011
from .inventory import Inventory
1112

1213

14+
class Migration(Enum):
15+
KAP_029_030 = "kapitan-0.29-to-0.30"
16+
17+
1318
# pylint: disable=too-many-instance-attributes,too-many-public-methods
1419
class Config:
1520
_inventory: Inventory
1621
_components: Dict[str, Component]
1722
_config_repos: Dict[str, Repo]
1823
_component_aliases: Dict[str, str]
1924
_deprecation_notices: List[str]
25+
_migration: Optional[Migration]
2026

2127
# pylint: disable=too-many-arguments
2228
def __init__(
@@ -46,6 +52,7 @@ def __init__(
4652
self._deprecation_notices = []
4753
self._global_repo_revision_override = None
4854
self._tenant_repo_revision_override = None
55+
self._migration = None
4956

5057
@property
5158
def verbose(self):
@@ -124,6 +131,15 @@ def tenant_repo_revision_override(self):
124131
def tenant_repo_revision_override(self, rev):
125132
self._tenant_repo_revision_override = rev
126133

134+
@property
135+
def migration(self):
136+
return self._migration
137+
138+
@migration.setter
139+
def migration(self, migration):
140+
if migration and migration != "":
141+
self._migration = Migration(migration)
142+
127143
@property
128144
def inventory(self):
129145
return self._inventory

commodore/git.py

Lines changed: 63 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
import difflib
22
import hashlib
33

4+
from typing import Iterable, List, Tuple
5+
from typing_extensions import Protocol
6+
47
import click
58

69
from git import Repo, Actor
@@ -126,7 +129,63 @@ def _compute_similarity(change):
126129
return similarity_diff
127130

128131

129-
def stage_all(repo):
132+
class DiffFunc(Protocol):
133+
def __call__(
134+
self, before_text: str, after_text: str, fromfile: str = "", tofile: str = ""
135+
) -> Tuple[Iterable[str], bool]:
136+
...
137+
138+
139+
def _default_difffunc(
140+
before_text: str, after_text: str, fromfile: str = "", tofile: str = ""
141+
) -> Tuple[Iterable[str], bool]:
142+
before_lines = before_text.split("\n")
143+
after_lines = after_text.split("\n")
144+
diff_lines = difflib.unified_diff(
145+
before_lines, after_lines, lineterm="", fromfile=fromfile, tofile=tofile
146+
)
147+
# never suppress diffs in default difffunc
148+
return diff_lines, False
149+
150+
151+
def _process_diffs(
152+
change_type: str, changes: Iterable, diff_func: DiffFunc
153+
) -> Iterable[str]:
154+
difftext = []
155+
for c in changes:
156+
# Because we're diffing the staged changes, the diff objects
157+
# are backwards, and "added" files are actually being deleted
158+
# and vice versa for "deleted" files.
159+
if change_type == "A":
160+
difftext.append(click.style(f"Deleted file {c.b_path}", fg="red"))
161+
elif change_type == "D":
162+
difftext.append(click.style(f"Added file {c.b_path}", fg="green"))
163+
elif change_type == "R":
164+
difftext.append(
165+
click.style(f"Renamed file {c.b_path} => {c.a_path}", fg="yellow")
166+
)
167+
else:
168+
# Other changes should produce a usable diff
169+
# The diff objects are backwards, so use b_blob as before
170+
# and a_blob as after.
171+
before = c.b_blob.data_stream.read().decode("utf-8")
172+
after = c.a_blob.data_stream.read().decode("utf-8")
173+
diff_lines, suppress_diff = diff_func(
174+
before, after, fromfile=c.b_path, tofile=c.a_path
175+
)
176+
if not suppress_diff:
177+
if c.renamed_file:
178+
# Just compute similarity ratio for renamed files
179+
# similar to git's diffing
180+
difftext.append("\n".join(_compute_similarity(c)).strip())
181+
continue
182+
diff_lines = [_colorize_diff(line) for line in diff_lines]
183+
difftext.append("\n".join(diff_lines).strip())
184+
185+
return difftext
186+
187+
188+
def stage_all(repo, diff_func: DiffFunc = _default_difffunc):
130189
index = repo.index
131190

132191
# Stage deletions
@@ -149,40 +208,12 @@ def stage_all(repo):
149208
diff = index.diff(_NULL_TREE(repo))
150209

151210
changed = False
152-
difftext = []
211+
difftext: List[str] = []
153212
if diff:
154213
changed = True
155214
for ct in diff.change_type:
156-
for c in diff.iter_change_type(ct):
157-
# Because we're diffing the staged changes, the diff objects
158-
# are backwards, and "added" files are actually being deleted
159-
# and vice versa for "deleted" files.
160-
if ct == "A":
161-
difftext.append(click.style(f"Deleted file {c.b_path}", fg="red"))
162-
elif ct == "D":
163-
difftext.append(click.style(f"Added file {c.b_path}", fg="green"))
164-
elif ct == "R":
165-
difftext.append(
166-
click.style(
167-
f"Renamed file {c.b_path} => {c.a_path}", fg="yellow"
168-
)
169-
)
170-
else:
171-
if c.renamed_file:
172-
# Just compute similarity ratio for renamed files
173-
# similar to git's diffing
174-
difftext.append("\n".join(_compute_similarity(c)).strip())
175-
continue
176-
# Other changes should produce a usable diff
177-
# The diff objects are backwards, so use b_blob as before
178-
# and a_blob as after.
179-
before = c.b_blob.data_stream.read().decode("utf-8").split("\n")
180-
after = c.a_blob.data_stream.read().decode("utf-8").split("\n")
181-
diff_lines = difflib.unified_diff(
182-
before, after, lineterm="", fromfile=c.b_path, tofile=c.a_path
183-
)
184-
diff_lines = [_colorize_diff(line) for line in diff_lines]
185-
difftext.append("\n".join(diff_lines).strip())
215+
ct_difftext = _process_diffs(ct, diff.iter_change_type(ct), diff_func)
216+
difftext.extend(ct_difftext)
186217

187218
return "\n".join(difftext), changed
188219

commodore/k8sobject.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
class K8sObject:
2+
def __init__(self, obj):
3+
self._obj = {}
4+
if obj:
5+
self._obj = obj
6+
self._kind = self._obj.get("kind", "")
7+
self._name = self._obj.get("metadata", {}).get("name", "")
8+
self._namespace = self._obj.get("metadata", {}).get("namespace", "")
9+
10+
def __lt__(self, other):
11+
if self._kind != other._kind:
12+
return self._kind < other._kind
13+
if self._namespace != other._namespace:
14+
return self._namespace < other._namespace
15+
return self._name < other._name
16+
17+
def __gt__(self, other):
18+
if self._kind != other._kind:
19+
return self._kind > other._kind
20+
if self._namespace != other._namespace:
21+
return self._namespace > other._namespace
22+
return self._name > other._name
23+
24+
def __eq__(self, other):
25+
return (
26+
self._kind == other._kind
27+
and self._namespace == other._namespace
28+
and self._name == other._name
29+
)
30+
31+
def __le__(self, other):
32+
return not self.__gt__(other)
33+
34+
def __ge__(self, other):
35+
return not self.__lt__(other)
36+
37+
def __ne__(self, other):
38+
return not self.__eq__(other)

0 commit comments

Comments
 (0)