Skip to content
Snippets Groups Projects
Commit 90058833 authored by Nikolaj Tatti's avatar Nikolaj Tatti
Browse files

merge algorithm

parent 2435be4d
Branches main
No related tags found
No related merge requests found
......@@ -46,12 +46,11 @@ class Oracle:
# GExact algorithm
def greedy_exact(X, k):
def greedy_exact(X, Winit, Sinit, k):
n, m = X.shape
S = np.zeros((k, m))
S[0, :] = 1
W = np.zeros((n, k))
R = X.copy()
S = Sinit.copy()
W = Winit.copy()
R = X - W@S
prevcost = np.inf
bestcost = np.sum(R**2)
rounds = 0
......@@ -97,7 +96,7 @@ def maxseg(o, m):
def est(o, sigma, delta, m):
cost = np.inf
best = None
while sigma > 0:
while sigma >= 0:
i = 1
j = 1
besta = 0
......@@ -114,19 +113,22 @@ def est(o, sigma, delta, m):
else:
i += 1
sigma = besta - delta
if delta == 0: # Special case when sigma is not getting smaller
break
return best
# GEst algorithm
def greedy_est(X, k, eps):
def greedy_est(X, Winit, Sinit, k, eps):
n, m = X.shape
S = np.zeros((k, m))
S[0, :] = 1
W = np.zeros((n, k))
R = X.copy()
S = Sinit.copy()
W = Winit.copy()
R = X - W@S
prevcost = np.inf
bestcost = np.sum(R**2)
rounds = 0
bestS = S.copy()
bestW = W.copy()
while bestcost < prevcost:
prevcost = bestcost
for r in range(1, k):
......@@ -153,8 +155,16 @@ def greedy_est(X, k, eps):
R -= np.outer(W[:, r], S[r, :])
bestcost = np.sum(R**2)
if bestcost < prevcost:
bestS = S.copy()
bestW = W.copy()
else:
bestcost = prevcost # Estimate failed to improve
#print(bestcost)
rounds += 1
return W, S, bestcost, rounds
return bestW, bestS, bestcost, rounds
def extract_ranges(S):
k, m = S.shape
......@@ -251,16 +261,17 @@ def hill(R, w):
return i, j
# IHill algorithm
def iterative_hill(X, W):
def iterative_hill(X, Winit, Sinit):
n, m = X.shape
k = W.shape[1]
S = np.zeros((k, m))
S[0, :] = 1
S = Sinit.copy()
W = Winit.copy()
R = X - W@S
prevcost = np.inf
R = X - W@S
bestcost = np.sum(R**2)
k = W.shape[1]
rounds = 0
while bestcost < prevcost:
prevcost = bestcost
......@@ -284,16 +295,6 @@ def iterative_hill(X, W):
return W, S, bestcost, rounds
def generate_seed(X, k):
n, m = X.shape
S = np.zeros((k, m))
S[0, :] = 1
for r in range(1, k):
i = np.random.randint(m)
j = np.random.randint(m)
S[r, min(i, j):(1 + max(i, j))] = 1
return X@S.T@np.linalg.inv(S@S.T + 10e-10*np.eye(k))
def buildlookup(k):
A = -np.ones((3**k, k), dtype=int)
for i in range(A.shape[0]):
......@@ -335,11 +336,13 @@ def builddecomp(W):
# Iexact algorithm
def iterative_exact(X, W):
def iterative_exact(X, Winit, Sinit):
n, m = X.shape
S = Sinit.copy()
W = Winit.copy()
R = X - W@S
k = W.shape[1]
S = np.zeros((k, m))
S[0, :] = 1
prevcost = np.inf
R = X - W@S
......@@ -384,7 +387,6 @@ def iterative_exact(X, W):
S[ind, j] = 1
c = best[c, j]
S = complete_S(S)
W = X@S.T@np.linalg.inv(S@S.T)
R = X - W@S
......@@ -392,3 +394,75 @@ def iterative_exact(X, W):
rounds += 1
return W, S, bestcost, rounds
def segment(X, k):
m = X.shape[1]
o = Oracle(X)
score = np.zeros((k, m))
ind = np.zeros((k, m), dtype=int)
for i in range(m):
score[0, i] = o.inner_cost(0, i)
ind[0, i] = -1
for r in range(1, k):
for i in range(m):
score[r, i] = score[r - 1, i]
ind[r, i] = ind[r - 1, i]
for j in range(1, i + 1):
cand = o.inner_cost(j, i) + score[r - 1, j - 1]
if cand < score[r, i]:
score[r, i] = cand
ind[r, i] = j - 1
i = m - 1
intervals = []
for r in reversed(range(k)):
intervals.append([ind[r, i] + 1, i])
i = ind[r, i]
if i < 0:
break
return intervals
def intervals2S(intervals):
S = np.zeros((intervals.shape[0], np.max(intervals) + 1))
for i, (a, b) in enumerate(intervals):
S[i, a:(b + 1)] = 1
return S
# Merge algorithm
def bottomup(X, k):
intervals = np.array([(0, X.shape[1] - 1)] + segment(X, 2*k - 1))
while (intervals.shape[0] > k):
q = intervals.shape[0]
costs = np.zeros(q*(q - 1)//2)
ties = np.zeros(q*(q - 1)//2)
cands = [None] * (q*(q - 1)//2)
ind = 0
for i in range(q):
for j in range(i):
a = intervals[i, :]
b = intervals[j, :]
newint = np.array([min(a[0], b[0]), max(a[1], b[1])])
cand = np.zeros((q - 1, 2))
cand = np.vstack((intervals[:j, :], newint, intervals[(1 + j):i, :], intervals[(i + 1):, :]))
S = intervals2S(cand)
S = complete_S(S)
W = X@S.T@np.linalg.inv(S@S.T)
R = X - W@S
costs[ind] = np.sum(R**2)
ties[ind] = newint[1] - newint[0]
cands[ind] = cand
ind += 1
best = np.min(costs)
inds = np.nonzero(costs < best + 10e-10)[0]
intervals = cands[inds[np.argmin(ties[inds])]]
S = intervals2S(intervals)
S = complete_S(S)
W = X@S.T@np.linalg.inv(S@S.T)
R = X - W@S
bestcost = np.sum(R**2)
return W, S, bestcost
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment