Skip to content

Use PathElementSet and similar to index PathElement #114

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Oct 22, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions fieldpath/element.go
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,12 @@ type PathElementSet struct {
members sortedPathElements
}

func MakePathElementSet(size int) PathElementSet {
return PathElementSet{
members: make(sortedPathElements, 0, size),
}
}

type sortedPathElements []PathElement

// Implement the sort interface; this would permit bulk creation, which would
Expand Down
85 changes: 85 additions & 0 deletions fieldpath/pathelementmap.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
/*
Copyright 2018 The Kubernetes Authors.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package fieldpath

import (
"sort"

"sigs.k8s.io/structured-merge-diff/value"
)

// PathElementValueMap is a map from PathElement to value.Value.
//
// TODO(apelisse): We have multiple very similar implementation of this
// for PathElementSet and SetNodeMap, so we could probably share the
// code.
type PathElementValueMap struct {
members sortedPathElementValues
}

func MakePathElementValueMap(size int) PathElementValueMap {
return PathElementValueMap{
members: make(sortedPathElementValues, 0, size),
}
}

type pathElementValue struct {
PathElement PathElement
Value value.Value
}

type sortedPathElementValues []pathElementValue

// Implement the sort interface; this would permit bulk creation, which would
// be faster than doing it one at a time via Insert.
func (spev sortedPathElementValues) Len() int { return len(spev) }
func (spev sortedPathElementValues) Less(i, j int) bool {
return spev[i].PathElement.Less(spev[j].PathElement)
}
func (spev sortedPathElementValues) Swap(i, j int) { spev[i], spev[j] = spev[j], spev[i] }

// Insert adds the pathelement and associated value in the map.
func (s *PathElementValueMap) Insert(pe PathElement, v value.Value) {
loc := sort.Search(len(s.members), func(i int) bool {
return !s.members[i].PathElement.Less(pe)
})
if loc == len(s.members) {
s.members = append(s.members, pathElementValue{pe, v})
return
}
if s.members[loc].PathElement.Equals(pe) {
return
}
s.members = append(s.members, pathElementValue{})
copy(s.members[loc+1:], s.members[loc:])
s.members[loc] = pathElementValue{pe, v}
}

// Get retrieves the value associated with the given PathElement from the map.
// (nil, false) is returned if there is no such PathElement.
func (s *PathElementValueMap) Get(pe PathElement) (value.Value, bool) {
loc := sort.Search(len(s.members), func(i int) bool {
return !s.members[i].PathElement.Less(pe)
})
if loc == len(s.members) {
return value.Value{}, false
}
if s.members[loc].PathElement.Equals(pe) {
return s.members[loc].Value, true
}
return value.Value{}, false
}
50 changes: 50 additions & 0 deletions fieldpath/pathelementmap_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
/*
Copyright 2019 The Kubernetes Authors.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package fieldpath

import (
"testing"

"sigs.k8s.io/structured-merge-diff/value"
)

func TestPathElementValueMap(t *testing.T) {
m := PathElementValueMap{}

if _, ok := m.Get(PathElement{FieldName: strptr("onion")}); ok {
t.Fatal("Unexpected path-element found in empty map")
}

m.Insert(PathElement{FieldName: strptr("carrot")}, value.StringValue("knife"))
m.Insert(PathElement{FieldName: strptr("chive")}, value.IntValue(2))

if _, ok := m.Get(PathElement{FieldName: strptr("onion")}); ok {
t.Fatal("Unexpected path-element in map")
}

if val, ok := m.Get(PathElement{FieldName: strptr("carrot")}); !ok {
t.Fatal("Missing path-element in map")
} else if !val.Equals(value.StringValue("knife")) {
t.Fatalf("Unexpected value found: %#v", val)
}

if val, ok := m.Get(PathElement{FieldName: strptr("chive")}); !ok {
t.Fatal("Missing path-element in map")
} else if !val.Equals(value.IntValue(2)) {
t.Fatalf("Unexpected value found: %#v", val)
}
}
46 changes: 23 additions & 23 deletions typed/merge.go
Original file line number Diff line number Diff line change
Expand Up @@ -168,8 +168,9 @@ func (w *mergingWalker) visitListItems(t *schema.List, lhs, rhs *value.List) (er
rhsOrder := []fieldpath.PathElement{}

// First, collect all RHS children.
observedRHS := map[string]value.Value{}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm surprised I missed these uses of strings when I was working on this last quarter...

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. We had much bigger things to improve,
  2. Maybe because of the test data?!

Anyways, /shrug

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This PR is a fairly small improvement compared to some of the things that are coming.

var observedRHS fieldpath.PathElementValueMap
if rhs != nil {
observedRHS = fieldpath.MakePathElementValueMap(len(rhs.Items))
for i, child := range rhs.Items {
pe, err := listItemToPathElement(t, i, child)
if err != nil {
Expand All @@ -179,18 +180,18 @@ func (w *mergingWalker) visitListItems(t *schema.List, lhs, rhs *value.List) (er
// this element.
continue
}
keyStr := pe.String()
if _, found := observedRHS[keyStr]; found {
errs = append(errs, w.errorf("rhs: duplicate entries for key %v", keyStr)...)
if _, ok := observedRHS.Get(pe); ok {
errs = append(errs, w.errorf("rhs: duplicate entries for key %v", pe.String())...)
}
observedRHS[keyStr] = child
observedRHS.Insert(pe, child)
rhsOrder = append(rhsOrder, pe)
}
}

// Then merge with LHS children.
observedLHS := map[string]struct{}{}
var observedLHS fieldpath.PathElementSet
if lhs != nil {
observedLHS = fieldpath.MakePathElementSet(len(lhs.Items))
for i, child := range lhs.Items {
pe, err := listItemToPathElement(t, i, child)
if err != nil {
Expand All @@ -200,15 +201,14 @@ func (w *mergingWalker) visitListItems(t *schema.List, lhs, rhs *value.List) (er
// this element.
continue
}
keyStr := pe.String()
if _, found := observedLHS[keyStr]; found {
errs = append(errs, w.errorf("lhs: duplicate entries for key %v", keyStr)...)
if observedLHS.Has(pe) {
errs = append(errs, w.errorf("lhs: duplicate entries for key %v", pe.String())...)
continue
}
observedLHS[keyStr] = struct{}{}
observedLHS.Insert(pe)
w2 := w.prepareDescent(pe, t.ElementType)
w2.lhs = &child
if rchild, ok := observedRHS[keyStr]; ok {
if rchild, ok := observedRHS.Get(pe); ok {
w2.rhs = &rchild
}
if newErrs := w2.merge(); len(newErrs) > 0 {
Expand All @@ -217,22 +217,22 @@ func (w *mergingWalker) visitListItems(t *schema.List, lhs, rhs *value.List) (er
out.Items = append(out.Items, *w2.out)
}
w.finishDescent(w2)
// Keep track of children that have been handled
delete(observedRHS, keyStr)
}
}

for _, rhsToCheck := range rhsOrder {
if unmergedChild, ok := observedRHS[rhsToCheck.String()]; ok {
w2 := w.prepareDescent(rhsToCheck, t.ElementType)
w2.rhs = &unmergedChild
if newErrs := w2.merge(); len(newErrs) > 0 {
errs = append(errs, newErrs...)
} else if w2.out != nil {
out.Items = append(out.Items, *w2.out)
}
w.finishDescent(w2)
for _, pe := range rhsOrder {
if observedLHS.Has(pe) {
continue
}
value, _ := observedRHS.Get(pe)
w2 := w.prepareDescent(pe, t.ElementType)
w2.rhs = &value
if newErrs := w2.merge(); len(newErrs) > 0 {
errs = append(errs, newErrs...)
} else if w2.out != nil {
out.Items = append(out.Items, *w2.out)
}
w.finishDescent(w2)
}

if len(out.Items) > 0 {
Expand Down
9 changes: 4 additions & 5 deletions typed/validate.go
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ func (v *validatingObjectWalker) doScalar(t *schema.Scalar) ValidationErrors {
}

func (v *validatingObjectWalker) visitListItems(t *schema.List, list *value.List) (errs ValidationErrors) {
observedKeys := map[string]struct{}{}
observedKeys := fieldpath.MakePathElementSet(len(list.Items))
for i, child := range list.Items {
pe, err := listItemToPathElement(t, i, child)
if err != nil {
Expand All @@ -152,11 +152,10 @@ func (v *validatingObjectWalker) visitListItems(t *schema.List, list *value.List
// this element.
continue
}
keyStr := pe.String()
if _, found := observedKeys[keyStr]; found {
errs = append(errs, v.errorf("duplicate entries for key %v", keyStr)...)
if observedKeys.Has(pe) {

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this really faster? i know the benchmarks are better but it makes the constant time operation with something that has to sort a slice and do a binary search

https://github.com/kubernetes-sigs/structured-merge-diff/blob/master/fieldpath/element.go#L243-L255

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

cc @lavalamp

It is slightly faster but most importantly it's more correct. Using the string as the index is a little wrong since it's not 100% canonicalized and is not meant to be used for "production use".

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Allocating a buffer for the string, converting the pathelement to a string, hashing the string, comparing the strings, is quite expensive too

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

String() was actually written to make this work, so this should be equally correct.

The set impl is O(log n) but with a low enough constant factor that it was faster than the O(1) thing it replaced. Now we don't have a O(1) thing to compare it with, I assume at some size it would stop being faster.

errs = append(errs, v.errorf("duplicate entries for key %v", pe.String())...)
}
observedKeys[keyStr] = struct{}{}
observedKeys.Insert(pe)
v2 := v.prepareDescent(pe, t.ElementType)
v2.value = child
errs = append(errs, v2.validate()...)
Expand Down