|
| 1 | +''' |
| 2 | +developed by: markmelnic |
| 3 | +original repo: https://github.com/markmelnic/Scoring-Algorithm |
| 4 | +
|
| 5 | +Analyse data using a range based percentual proximity algorithm |
| 6 | +and calculate the linear maximum likelihood estimation. |
| 7 | +The basic principle is that all values supplied will be broken |
| 8 | +down to a range from 0 to 1 and each column's score will be added |
| 9 | +up to get the total score. |
| 10 | +
|
| 11 | +========== |
| 12 | +Example for data of vehicles |
| 13 | +price|mileage|registration_year |
| 14 | +20k |60k |2012 |
| 15 | +22k |50k |2011 |
| 16 | +23k |90k |2015 |
| 17 | +16k |210k |2010 |
| 18 | +
|
| 19 | +We want the vehicle with the lowest price, |
| 20 | +lowest mileage but newest registration year. |
| 21 | +Thus the weights for each column are as follows: |
| 22 | +[0, 0, 1] |
| 23 | +
|
| 24 | +>>> procentual_proximity([[20, 60, 2012],[23, 90, 2015],[22, 50, 2011]], [0, 0, 1]) |
| 25 | +[[20, 60, 2012, 2.0], [23, 90, 2015, 1.0], [22, 50, 2011, 1.3333333333333335]] |
| 26 | +''' |
| 27 | + |
| 28 | + |
| 29 | +def procentual_proximity(source_data : list, weights : list) -> list: |
| 30 | + |
| 31 | + ''' |
| 32 | + weights - int list |
| 33 | + possible values - 0 / 1 |
| 34 | + 0 if lower values have higher weight in the data set |
| 35 | + 1 if higher values have higher weight in the data set |
| 36 | + ''' |
| 37 | + |
| 38 | + # getting data |
| 39 | + data_lists = [] |
| 40 | + for item in source_data: |
| 41 | + for i in range(len(item)): |
| 42 | + try: |
| 43 | + data_lists[i].append(float(item[i])) |
| 44 | + except IndexError: |
| 45 | + # generate corresponding number of lists |
| 46 | + data_lists.append([]) |
| 47 | + data_lists[i].append(float(item[i])) |
| 48 | + |
| 49 | + score_lists = [] |
| 50 | + # calculating each score |
| 51 | + for dlist, weight in zip(data_lists, weights): |
| 52 | + mind = min(dlist) |
| 53 | + maxd = max(dlist) |
| 54 | + |
| 55 | + score = [] |
| 56 | + # for weight 0 score is 1 - actual score |
| 57 | + if weight == 0: |
| 58 | + for item in dlist: |
| 59 | + try: |
| 60 | + score.append(1 - ((item - mind) / (maxd - mind))) |
| 61 | + except ZeroDivisionError: |
| 62 | + score.append(1) |
| 63 | + |
| 64 | + elif weight == 1: |
| 65 | + for item in dlist: |
| 66 | + try: |
| 67 | + score.append((item - mind) / (maxd - mind)) |
| 68 | + except ZeroDivisionError: |
| 69 | + score.append(0) |
| 70 | + |
| 71 | + # weight not 0 or 1 |
| 72 | + else: |
| 73 | + raise ValueError("Invalid weight of %f provided" % (weight)) |
| 74 | + |
| 75 | + score_lists.append(score) |
| 76 | + |
| 77 | + # initialize final scores |
| 78 | + final_scores = [0 for i in range(len(score_lists[0]))] |
| 79 | + |
| 80 | + # generate final scores |
| 81 | + for i, slist in enumerate(score_lists): |
| 82 | + for j, ele in enumerate(slist): |
| 83 | + final_scores[j] = final_scores[j] + ele |
| 84 | + |
| 85 | + # append scores to source data |
| 86 | + for i, ele in enumerate(final_scores): |
| 87 | + source_data[i].append(ele) |
| 88 | + |
| 89 | + return source_data |
0 commit comments