Here, our given data-set is :
We implement the K-medoid algorithm for this dataset as follows:
A1
|
A2
|
|
O1
|
2
|
6
|
O2
|
3
|
4
|
O3
|
3
|
8
|
O4
|
4
|
7
|
O5
|
6
|
2
|
O6
|
6
|
4
|
O7
|
7
|
3
|
O8
|
7
|
4
|
O9
|
8
|
5
|
O10
|
7
|
6
|
import random
data_set = [[2,6],[3,4],[3,8],[4,7],[6,2],[6,4],[7,3],[7,4],[8,5],[7,6]]
median = [1,7]
m = [[3,4],[7,4]]
c1 = []
c2 = []
count = 0
def assign_mediod():
median.append(random.randint(0,9))
while True:
a = random.randint(0,9)
if a <> median[0]:
median.append(a)
break
print(median)
m.append(data_set[median[0]])
m.append(data_set[median[1]])
def manhattan_distance(a,b):
return abs(a[0]-b[0]) + abs(a[1]-b[1])
def create_cluster():
print("data_set = ",data_set)
for i in range(0,10):
if i == median[0] or i == median[1]:
continue
else:
if manhattan_distance(data_set[i],m[0]) < manhattan_distance(data_set[i],m[1]):
c1.append(data_set[i])
else:
c2.append(data_set[i])
def AE(c1,c2,m):
total = 0
for o in c1:
total = total + manhattan_distance(o,m[0])
for o in c2:
total = total + manhattan_distance(o,m[1])
return total
def Check_for_Swap():
global m
global median
global c1
global c2
global count
count = count + 1
swap = False
tm = list(m)
tc1 = list(c1)
tc2 = list(c2)
for i in range(0,len(c1)):
temp = tm[0]
tm[0] = tc1[i]
tc1[i] = temp
print("Cost Function for old - ",c1,",",c2,",",m," = ",AE(c1,c2,m))
print("Cost Function for new - ",tc1,",",tc2,",",tm," = ",AE(tc1,tc2,tm))
if (AE(tc1,tc2,tm) - AE(c1,c2,m)) < 0:
m = list(tm)
swap = True
break
print("c1 iteration = ",i)
for i in range(0,len(c2)):
temp = tm[1]
tm[1] = tc2[i]
tc2[i] = temp
print("Cost Function for old - ",c1,",",c2,",",m," = ",AE(c1,c2,m))
print("Cost Function for new - ",tc1,",",tc2,",",tm," = ",AE(tc1,tc2,tm))
if (AE(tc1,tc2,tm) - AE(c1,c2,m)) < 0:
m = list(tm)
swap = True
break
print("c2 iteration = ",i)
median = [data_set.index(m[0]), data_set.index(m[1])]
print ("After iteration -",count)
print ("medoids = ",m)
print("medians = ",median)
print("c1 = ",c1)
print("c2 = ",c2)
if swap == True:
c1 = []
c2 = []
create_cluster()
Check_for_Swap()
else:
print(count)
print(m)
print(median)
print(c1)
print(c2)
if __name__ == "__main__":
assign_mediod()
print(m)
print(median)
create_cluster()
print(c1)
print(c2)
Check_for_Swap()
Let us suppose that the two medoids, the program randomly assigns are [O2,O7], then we get the following output
[[3, 4], [7, 4]]
[1, 7]
('data_set = ', [[2, 6], [3, 4], [3, 8], [4, 7], [6, 2], [6, 4], [7, 3], [7, 4], [8, 5], [7, 6]])
[[2, 6], [3, 8], [4, 7]]
[[6, 2], [6, 4], [7, 3], [8, 5], [7, 6]]
('Cost Function for old - ', [[2, 6], [3, 8], [4, 7]], ',', [[6, 2], [6, 4], [7, 3], [8, 5], [7, 6]], ',', [[3, 4], [7, 4]], ' = ', 20)
('Cost Function for new - ', [[3, 4], [3, 8], [4, 7]], ',', [[6, 2], [6, 4], [7, 3], [8, 5], [7, 6]], ',', [[2, 6], [7, 4]], ' = ', 18)
('Cost Function for old - ', [[2, 6], [3, 8], [4, 7]], ',', [[6, 2], [6, 4], [7, 3], [8, 5], [7, 6]], ',', [[2, 6], [7, 4]], ' = ', 15)
('Cost Function for new - ', [[3, 4], [3, 8], [4, 7]], ',', [[7, 4], [6, 4], [7, 3], [8, 5], [7, 6]], ',', [[2, 6], [6, 2]], ' = ', 26)
('c2 iteration = ', 0)
('Cost Function for old - ', [[2, 6], [3, 8], [4, 7]], ',', [[6, 2], [6, 4], [7, 3], [8, 5], [7, 6]], ',', [[2, 6], [7, 4]], ' = ', 15)
('Cost Function for new - ', [[3, 4], [3, 8], [4, 7]], ',', [[7, 4], [6, 2], [7, 3], [8, 5], [7, 6]], ',', [[2, 6], [6, 4]], ' = ', 20)
('c2 iteration = ', 1)
('Cost Function for old - ', [[2, 6], [3, 8], [4, 7]], ',', [[6, 2], [6, 4], [7, 3], [8, 5], [7, 6]], ',', [[2, 6], [7, 4]], ' = ', 15)
('Cost Function for new - ', [[3, 4], [3, 8], [4, 7]], ',', [[7, 4], [6, 2], [6, 4], [8, 5], [7, 6]], ',', [[2, 6], [7, 3]], ' = ', 20)
('c2 iteration = ', 2)
('Cost Function for old - ', [[2, 6], [3, 8], [4, 7]], ',', [[6, 2], [6, 4], [7, 3], [8, 5], [7, 6]], ',', [[2, 6], [7, 4]], ' = ', 15)
('Cost Function for new - ', [[3, 4], [3, 8], [4, 7]], ',', [[7, 4], [6, 2], [6, 4], [7, 3], [7, 6]], ',', [[2, 6], [8, 5]], ' = ', 24)
('c2 iteration = ', 3)
('Cost Function for old - ', [[2, 6], [3, 8], [4, 7]], ',', [[6, 2], [6, 4], [7, 3], [8, 5], [7, 6]], ',', [[2, 6], [7, 4]], ' = ', 15)
('Cost Function for new - ', [[3, 4], [3, 8], [4, 7]], ',', [[7, 4], [6, 2], [6, 4], [7, 3], [8, 5]], ',', [[2, 6], [7, 6]], ' = ', 24)
('c2 iteration = ', 4)
('After iteration -', 1)
('medoids = ', [[2, 6], [7, 4]])
('medians = ', [0, 7])
('c1 = ', [[2, 6], [3, 8], [4, 7]])
('c2 = ', [[6, 2], [6, 4], [7, 3], [8, 5], [7, 6]])
('data_set = ', [[2, 6], [3, 4], [3, 8], [4, 7], [6, 2], [6, 4], [7, 3], [7, 4], [8, 5], [7, 6]])
('Cost Function for old - ', [[3, 4], [3, 8], [4, 7]], ',', [[6, 2], [6, 4], [7, 3], [8, 5], [7, 6]], ',', [[2, 6], [7, 4]], ' = ', 18)
('Cost Function for new - ', [[2, 6], [3, 8], [4, 7]], ',', [[6, 2], [6, 4], [7, 3], [8, 5], [7, 6]], ',', [[3, 4], [7, 4]], ' = ', 20)
('c1 iteration = ', 0)
('Cost Function for old - ', [[3, 4], [3, 8], [4, 7]], ',', [[6, 2], [6, 4], [7, 3], [8, 5], [7, 6]], ',', [[2, 6], [7, 4]], ' = ', 18)
('Cost Function for new - ', [[2, 6], [3, 4], [4, 7]], ',', [[6, 2], [6, 4], [7, 3], [8, 5], [7, 6]], ',', [[3, 8], [7, 4]], ' = ', 18)
('c1 iteration = ', 1)
('Cost Function for old - ', [[3, 4], [3, 8], [4, 7]], ',', [[6, 2], [6, 4], [7, 3], [8, 5], [7, 6]], ',', [[2, 6], [7, 4]], ' = ', 18)
('Cost Function for new - ', [[2, 6], [3, 4], [3, 8]], ',', [[6, 2], [6, 4], [7, 3], [8, 5], [7, 6]], ',', [[4, 7], [7, 4]], ' = ', 18)
('c1 iteration = ', 2)
('Cost Function for old - ', [[3, 4], [3, 8], [4, 7]], ',', [[6, 2], [6, 4], [7, 3], [8, 5], [7, 6]], ',', [[2, 6], [7, 4]], ' = ', 18)
('Cost Function for new - ', [[2, 6], [3, 4], [3, 8]], ',', [[7, 4], [6, 4], [7, 3], [8, 5], [7, 6]], ',', [[4, 7], [6, 2]], ' = ', 26)
('c2 iteration = ', 0)
('Cost Function for old - ', [[3, 4], [3, 8], [4, 7]], ',', [[6, 2], [6, 4], [7, 3], [8, 5], [7, 6]], ',', [[2, 6], [7, 4]], ' = ', 18)
('Cost Function for new - ', [[2, 6], [3, 4], [3, 8]], ',', [[7, 4], [6, 2], [7, 3], [8, 5], [7, 6]], ',', [[4, 7], [6, 4]], ' = ', 20)
('c2 iteration = ', 1)
('Cost Function for old - ', [[3, 4], [3, 8], [4, 7]], ',', [[6, 2], [6, 4], [7, 3], [8, 5], [7, 6]], ',', [[2, 6], [7, 4]], ' = ', 18)
('Cost Function for new - ', [[2, 6], [3, 4], [3, 8]], ',', [[7, 4], [6, 2], [6, 4], [8, 5], [7, 6]], ',', [[4, 7], [7, 3]], ' = ', 20)
('c2 iteration = ', 2)
('Cost Function for old - ', [[3, 4], [3, 8], [4, 7]], ',', [[6, 2], [6, 4], [7, 3], [8, 5], [7, 6]], ',', [[2, 6], [7, 4]], ' = ', 18)
('Cost Function for new - ', [[2, 6], [3, 4], [3, 8]], ',', [[7, 4], [6, 2], [6, 4], [7, 3], [7, 6]], ',', [[4, 7], [8, 5]], ' = ', 24)
('c2 iteration = ', 3)
('Cost Function for old - ', [[3, 4], [3, 8], [4, 7]], ',', [[6, 2], [6, 4], [7, 3], [8, 5], [7, 6]], ',', [[2, 6], [7, 4]], ' = ', 18)
('Cost Function for new - ', [[2, 6], [3, 4], [3, 8]], ',', [[7, 4], [6, 2], [6, 4], [7, 3], [8, 5]], ',', [[4, 7], [7, 6]], ' = ', 24)
('c2 iteration = ', 4)
('After iteration -', 2)
('medoids = ', [[2, 6], [7, 4]])
('medians = ', [0, 7])
('c1 = ', [[3, 4], [3, 8], [4, 7]])
('c2 = ', [[6, 2], [6, 4], [7, 3], [8, 5], [7, 6]])
2
[[2, 6], [7, 4]]
[0, 7]
[[3, 4], [3, 8], [4, 7]]
[[6, 2], [6, 4], [7, 3], [8, 5], [7, 6]]
>>>
No comments:
Post a Comment