From 3657aedac7b7af82fbdd896da85c0b4be282318d Mon Sep 17 00:00:00 2001 From: dharni0607 Date: Mon, 1 Jul 2019 17:06:06 +0530 Subject: [PATCH 1/3] add ons in string directory - Bayer_Moore_Search --- strings/Boyer_Moore_Search.py | 74 +++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) create mode 100644 strings/Boyer_Moore_Search.py diff --git a/strings/Boyer_Moore_Search.py b/strings/Boyer_Moore_Search.py new file mode 100644 index 000000000000..dfbf0cb2e883 --- /dev/null +++ b/strings/Boyer_Moore_Search.py @@ -0,0 +1,74 @@ +""" +This algorithm tries to find the pattern in given text using Bad Character Heuristic method. +The bad-character rule considers the character in Text at which there is a mis-match. The next occurrence of that character to the left in Pattern is found, and a shift which brings that occurrence in line with the mismatched occurrence in Text is proposed. If the mismatched character does not occur to the left in Pattern, a shift is proposed that moves the entirety of Pattern past the point of mismatch + +Complexity : O(n/m) + n=length of main string + m=length of pattern string +""" + +class BoyerMooreSearch: + def __init__(self, text, pattern): + self.text, self.pattern = text, pattern + self.textLen, self.patLen = len(text), len(pattern) + + def match_In_Pattern(self, char): + + """ finds the index of char in pattern in reverse order + + Paremeters : + char (chr): character to be searched + + Returns : + i (int): index of char from last in pattern + -1 (int): if char is not found in pattern + """ + + for i in range(self.patLen-1, -1, -1): + if char == self.pattern[i]: + return i + return -1 + + + def misMatch_In_Text(self, currentPos): + + """ finds the index of mis-matched character in text when compared with pattern from last + + Paremeters : + currentPos (int): current index position of text + + Returns : + i (int): index of mismatched char from last in text + -1 (int): if there is no mis-match between pattern and text block + """ + + for i in range(self.patLen-1, -1, -1): + if self.pattern[i] != self.text[currentPos + i]: + return currentPos + i + return -1 + + def bad_Character_Heuristic(self): + + """ searches pattern in text and returns index positions """ + positions = [] + for i in range(self.textLen - self.patLen + 1): + misMatch_Index = self.misMatch_In_Text(i) + if misMatch_Index == -1: + positions.append(i) + else: + match_Index = self.match_In_Pattern(self.text[misMatch_Index]) + i = misMatch_Index - match_Index #shifting index + return positions + + +text = "ABAABA" +pattern = "AB" +bms = BoyerMooreSearch(text, pattern) +positions = bms.bad_Character_Heuristic() +if len(positions) == 0: + print("No match found") +else: + print("Pattern found in following positions: ") + print(positions) + + From cfbbb7b92c28d9a04f4507e357f775521a850809 Mon Sep 17 00:00:00 2001 From: dharni0607 Date: Tue, 2 Jul 2019 16:00:17 +0530 Subject: [PATCH 2/3] created divide_and_conquer folder and added max_sub_array_sum.py under it (issue #817) --- divide_and_conquer/closest_pair_of_points.py | 67 ++++++++++++++++++ divide_and_conquer/max_sub_array_sum.py | 72 +++++++++++++++++++ strings/Boyer_Moore_Search.py | 74 -------------------- 3 files changed, 139 insertions(+), 74 deletions(-) create mode 100644 divide_and_conquer/closest_pair_of_points.py create mode 100644 divide_and_conquer/max_sub_array_sum.py delete mode 100644 strings/Boyer_Moore_Search.py diff --git a/divide_and_conquer/closest_pair_of_points.py b/divide_and_conquer/closest_pair_of_points.py new file mode 100644 index 000000000000..f06abce95ffa --- /dev/null +++ b/divide_and_conquer/closest_pair_of_points.py @@ -0,0 +1,67 @@ +""" +The algorithm finds distance between closest pair of points in the given n points. +Approach: Divide and conquer +The points are sorted based on x-cords +& by applying divide and conquer approach, +minimum distance is obtained recursively. + +Edge case: closest points lie on different sides of partition +This case handled by forming a strip of points +which are at a distance (< closest_pair_dis) from mid-point. +(It is a proven that strip contains at most 6 points) +And brute force method is applied on strip to find closest points. + +Time complexity: O(n * (logn) ^ 2) +""" + + +import math + + +def euclidean_distance(point1, point2): + return math.sqrt(pow(point1[0] - point2[0], 2) + pow(point1[1] - point2[1], 2)) + + +def column_based_sort(array, column = 0): + return sorted(array, key = lambda x: x[column]) + + +#brute force approach to find distance between closest pair points +def dis_btw_closest_pair(points, no_of_points, min_dis = float("inf")): + for i in range(no_of_points - 1): + for j in range(i+1, no_of_points): + current_dis = euclidean_distance(points[i], points[j]) + if current_dis < min_dis: + min_dis = current_dis + return min_dis + + +#divide and conquer approach +def closest_pair_of_points(points, no_of_points): + # base case + if no_of_points <= 3: + return dis_btw_closest_pair(points, no_of_points) + + #recursion + mid = no_of_points//2 + closest_in_left = closest_pair_of_points(points[:mid], mid) + closest_in_right = closest_pair_of_points(points[mid:], no_of_points - mid) + closest_pair_dis = min(closest_in_left, closest_in_right) + + #points which are at a distance (< closest_pair_dis) from mid-point + cross_strip = [] + for point in points: + if abs(point[0] - points[mid][0]) < closest_pair_dis: + cross_strip.append(point) + + cross_strip = column_based_sort(cross_strip, 1) + closest_in_strip = dis_btw_closest_pair(cross_strip, + len(cross_strip), closest_pair_dis) + return min(closest_pair_dis, closest_in_strip) + + +points = [[2, 3], [12, 30], [40, 50], [5, 1], [12, 10]] +points = column_based_sort(points) +print("Distance:", closest_pair_of_points(points, len(points))) + + diff --git a/divide_and_conquer/max_sub_array_sum.py b/divide_and_conquer/max_sub_array_sum.py new file mode 100644 index 000000000000..531a45abca6f --- /dev/null +++ b/divide_and_conquer/max_sub_array_sum.py @@ -0,0 +1,72 @@ +""" +Given a array of length n, max_sub_array_sum() finds the maximum of sum of contiguous sub-array using divide and conquer method. + +Time complexity : O(n log n) + +Ref : INTRODUCTION TO ALGORITHMS THIRD EDITION (section : 4, sub-section : 4.1, page : 70) + +""" + + +def max_sum_from_start(array): + """ This function finds the maximum contiguous sum of array from 0 index + + Parameters : + array (list[int]) : given array + + Returns : + max_sum (int) : maximum contiguous sum of array from 0 index + + """ + array_sum = 0 + max_sum = float("-inf") + for num in array: + array_sum += num + if array_sum > max_sum: + max_sum = array_sum + return max_sum + + +def max_cross_array_sum(array, left, mid, right): + """ This function finds the maximum contiguous sum of left and right arrays + + Parameters : + array, left, mid, right (list[int], int, int, int) + + Returns : + (int) : maximum of sum of contiguous sum of left and right arrays + + """ + + max_sum_of_left = max_sum_from_start(array[left:mid+1][::-1]) + max_sum_of_right = max_sum_from_start(array[mid+1: right+1]) + return max_sum_of_left + max_sum_of_right + + +def max_sub_array_sum(array, left, right): + """ This function finds the maximum of sum of contiguous sub-array using divide and conquer method + + Parameters : + array, left, right (list[int], int, int) : given array, current left index and current right index + + Returns : + int : maximum of sum of contiguous sub-array + + """ + + # base case: array has only one element + if left == right: + return array[right] + + # Recursion + mid = (left + right) // 2 + left_half_sum = max_sub_array_sum(array, left, mid) + right_half_sum = max_sub_array_sum(array, mid + 1, right) + cross_sum = max_cross_array_sum(array, left, mid, right) + return max(left_half_sum, right_half_sum, cross_sum) + + +array = [-2, -5, 6, -2, -3, 1, 5, -6] +array_length = len(array) +print("Maximum sum of contiguous subarray:", max_sub_array_sum(array, 0, array_length - 1)) + diff --git a/strings/Boyer_Moore_Search.py b/strings/Boyer_Moore_Search.py deleted file mode 100644 index dfbf0cb2e883..000000000000 --- a/strings/Boyer_Moore_Search.py +++ /dev/null @@ -1,74 +0,0 @@ -""" -This algorithm tries to find the pattern in given text using Bad Character Heuristic method. -The bad-character rule considers the character in Text at which there is a mis-match. The next occurrence of that character to the left in Pattern is found, and a shift which brings that occurrence in line with the mismatched occurrence in Text is proposed. If the mismatched character does not occur to the left in Pattern, a shift is proposed that moves the entirety of Pattern past the point of mismatch - -Complexity : O(n/m) - n=length of main string - m=length of pattern string -""" - -class BoyerMooreSearch: - def __init__(self, text, pattern): - self.text, self.pattern = text, pattern - self.textLen, self.patLen = len(text), len(pattern) - - def match_In_Pattern(self, char): - - """ finds the index of char in pattern in reverse order - - Paremeters : - char (chr): character to be searched - - Returns : - i (int): index of char from last in pattern - -1 (int): if char is not found in pattern - """ - - for i in range(self.patLen-1, -1, -1): - if char == self.pattern[i]: - return i - return -1 - - - def misMatch_In_Text(self, currentPos): - - """ finds the index of mis-matched character in text when compared with pattern from last - - Paremeters : - currentPos (int): current index position of text - - Returns : - i (int): index of mismatched char from last in text - -1 (int): if there is no mis-match between pattern and text block - """ - - for i in range(self.patLen-1, -1, -1): - if self.pattern[i] != self.text[currentPos + i]: - return currentPos + i - return -1 - - def bad_Character_Heuristic(self): - - """ searches pattern in text and returns index positions """ - positions = [] - for i in range(self.textLen - self.patLen + 1): - misMatch_Index = self.misMatch_In_Text(i) - if misMatch_Index == -1: - positions.append(i) - else: - match_Index = self.match_In_Pattern(self.text[misMatch_Index]) - i = misMatch_Index - match_Index #shifting index - return positions - - -text = "ABAABA" -pattern = "AB" -bms = BoyerMooreSearch(text, pattern) -positions = bms.bad_Character_Heuristic() -if len(positions) == 0: - print("No match found") -else: - print("Pattern found in following positions: ") - print(positions) - - From 59858feac517e21e2381f71e0e5ef60a6374565b Mon Sep 17 00:00:00 2001 From: dharni0607 Date: Wed, 3 Jul 2019 14:34:23 +0530 Subject: [PATCH 3/3] additional file in divide_and_conqure (closest pair of points) --- divide_and_conquer/closest_pair_of_points.py | 96 ++++++++++++++----- ...x_sub_array_sum.py => max_subarray_sum.py} | 19 ++-- 2 files changed, 82 insertions(+), 33 deletions(-) rename divide_and_conquer/{max_sub_array_sum.py => max_subarray_sum.py} (67%) diff --git a/divide_and_conquer/closest_pair_of_points.py b/divide_and_conquer/closest_pair_of_points.py index f06abce95ffa..cc5be428db79 100644 --- a/divide_and_conquer/closest_pair_of_points.py +++ b/divide_and_conquer/closest_pair_of_points.py @@ -1,66 +1,112 @@ """ -The algorithm finds distance between closest pair of points in the given n points. -Approach: Divide and conquer -The points are sorted based on x-cords +The algorithm finds distance btw closest pair of points in the given n points. +Approach used -> Divide and conquer +The points are sorted based on Xco-ords & by applying divide and conquer approach, minimum distance is obtained recursively. -Edge case: closest points lie on different sides of partition +>> closest points lie on different sides of partition This case handled by forming a strip of points -which are at a distance (< closest_pair_dis) from mid-point. -(It is a proven that strip contains at most 6 points) -And brute force method is applied on strip to find closest points. +whose Xco-ords distance is less than closest_pair_dis +from mid-point's Xco-ords. +Closest pair distance is found in the strip of points. (closest_in_strip) -Time complexity: O(n * (logn) ^ 2) +min(closest_pair_dis, closest_in_strip) would be the final answer. + +Time complexity: O(n * (logn)^2) """ import math -def euclidean_distance(point1, point2): - return math.sqrt(pow(point1[0] - point2[0], 2) + pow(point1[1] - point2[1], 2)) +def euclidean_distance_sqr(point1, point2): + return pow(point1[0] - point2[0], 2) + pow(point1[1] - point2[1], 2) def column_based_sort(array, column = 0): return sorted(array, key = lambda x: x[column]) -#brute force approach to find distance between closest pair points -def dis_btw_closest_pair(points, no_of_points, min_dis = float("inf")): - for i in range(no_of_points - 1): - for j in range(i+1, no_of_points): - current_dis = euclidean_distance(points[i], points[j]) +def dis_between_closest_pair(points, points_counts, min_dis = float("inf")): + """ brute force approach to find distance between closest pair points + + Parameters : + points, points_count, min_dis (list(tuple(int, int)), int, int) + + Returns : + min_dis (float): distance between closest pair of points + + """ + + for i in range(points_counts - 1): + for j in range(i+1, points_counts): + current_dis = euclidean_distance_sqr(points[i], points[j]) + if current_dis < min_dis: + min_dis = current_dis + return min_dis + + +def dis_between_closest_in_strip(points, points_counts, min_dis = float("inf")): + """ closest pair of points in strip + + Parameters : + points, points_count, min_dis (list(tuple(int, int)), int, int) + + Returns : + min_dis (float): distance btw closest pair of points in the strip (< min_dis) + + """ + + for i in range(min(6, points_counts - 1), points_counts): + for j in range(max(0, i-6), i): + current_dis = euclidean_distance_sqr(points[i], points[j]) if current_dis < min_dis: min_dis = current_dis return min_dis -#divide and conquer approach -def closest_pair_of_points(points, no_of_points): +def closest_pair_of_points_sqr(points, points_counts): + """ divide and conquer approach + + Parameters : + points, points_count (list(tuple(int, int)), int) + + Returns : + (float): distance btw closest pair of points + + """ + # base case - if no_of_points <= 3: - return dis_btw_closest_pair(points, no_of_points) + if points_counts <= 3: + return dis_between_closest_pair(points, points_counts) - #recursion - mid = no_of_points//2 + # recursion + mid = points_counts//2 closest_in_left = closest_pair_of_points(points[:mid], mid) - closest_in_right = closest_pair_of_points(points[mid:], no_of_points - mid) + closest_in_right = closest_pair_of_points(points[mid:], points_counts - mid) closest_pair_dis = min(closest_in_left, closest_in_right) - #points which are at a distance (< closest_pair_dis) from mid-point + """ cross_strip contains the points, whose Xcoords are at a + distance(< closest_pair_dis) from mid's Xcoord + """ + cross_strip = [] for point in points: if abs(point[0] - points[mid][0]) < closest_pair_dis: cross_strip.append(point) cross_strip = column_based_sort(cross_strip, 1) - closest_in_strip = dis_btw_closest_pair(cross_strip, + closest_in_strip = dis_between_closest_in_strip(cross_strip, len(cross_strip), closest_pair_dis) return min(closest_pair_dis, closest_in_strip) + +def closest_pair_of_points(points, points_counts): + return math.sqrt(closest_pair_of_points_sqr(points, points_counts)) + -points = [[2, 3], [12, 30], [40, 50], [5, 1], [12, 10]] +points = [(2, 3), (12, 30), (40, 50), (5, 1), (12, 10), (0, 2), (5, 6), (1, 2)] points = column_based_sort(points) print("Distance:", closest_pair_of_points(points, len(points))) diff --git a/divide_and_conquer/max_sub_array_sum.py b/divide_and_conquer/max_subarray_sum.py similarity index 67% rename from divide_and_conquer/max_sub_array_sum.py rename to divide_and_conquer/max_subarray_sum.py index 531a45abca6f..0428f4e13768 100644 --- a/divide_and_conquer/max_sub_array_sum.py +++ b/divide_and_conquer/max_subarray_sum.py @@ -1,9 +1,11 @@ """ -Given a array of length n, max_sub_array_sum() finds the maximum of sum of contiguous sub-array using divide and conquer method. +Given a array of length n, max_subarray_sum() finds +the maximum of sum of contiguous sub-array using divide and conquer method. Time complexity : O(n log n) -Ref : INTRODUCTION TO ALGORITHMS THIRD EDITION (section : 4, sub-section : 4.1, page : 70) +Ref : INTRODUCTION TO ALGORITHMS THIRD EDITION +(section : 4, sub-section : 4.1, page : 70) """ @@ -43,11 +45,12 @@ def max_cross_array_sum(array, left, mid, right): return max_sum_of_left + max_sum_of_right -def max_sub_array_sum(array, left, right): - """ This function finds the maximum of sum of contiguous sub-array using divide and conquer method +def max_subarray_sum(array, left, right): + """ Maximum contiguous sub-array sum, using divide and conquer method Parameters : - array, left, right (list[int], int, int) : given array, current left index and current right index + array, left, right (list[int], int, int) : + given array, current left index and current right index Returns : int : maximum of sum of contiguous sub-array @@ -60,13 +63,13 @@ def max_sub_array_sum(array, left, right): # Recursion mid = (left + right) // 2 - left_half_sum = max_sub_array_sum(array, left, mid) - right_half_sum = max_sub_array_sum(array, mid + 1, right) + left_half_sum = max_subarray_sum(array, left, mid) + right_half_sum = max_subarray_sum(array, mid + 1, right) cross_sum = max_cross_array_sum(array, left, mid, right) return max(left_half_sum, right_half_sum, cross_sum) array = [-2, -5, 6, -2, -3, 1, 5, -6] array_length = len(array) -print("Maximum sum of contiguous subarray:", max_sub_array_sum(array, 0, array_length - 1)) +print("Maximum sum of contiguous subarray:", max_subarray_sum(array, 0, array_length - 1))