merge algorithm

90058833 · Nikolaj Tatti · 2435be4d · 90058833
Commit 90058833 authored 2 years ago by Nikolaj Tatti
--- a/c1decomp.py
+++ b/c1decomp.py
@@ -46,12 +46,11 @@ class Oracle:


 # GExact algorithm
-def greedy_exact(X, k):
+def greedy_exact(X, Winit, Sinit, k):
    n, m = X.shape
-    S = np.zeros((k, m))
-    S[0, :] = 1
-    W = np.zeros((n, k))
-    R = X.copy()
+    S = Sinit.copy()
+    W = Winit.copy()
+    R = X - W@S
    prevcost = np.inf
    bestcost = np.sum(R**2)
    rounds = 0
@@ -97,7 +96,7 @@ def maxseg(o, m):
 def est(o, sigma, delta, m):
    cost = np.inf
    best = None
-    while sigma > 0:
+    while sigma >= 0:
        i = 1
        j = 1
        besta = 0
@@ -114,19 +113,22 @@ def est(o, sigma, delta, m):
            else:
                i += 1
        sigma = besta - delta
-
+        if delta == 0: # Special case when sigma is not getting smaller
+            break
    return best

+
 # GEst algorithm
-def greedy_est(X, k, eps):
+def greedy_est(X, Winit, Sinit, k, eps):
    n, m = X.shape
-    S = np.zeros((k, m))
-    S[0, :] = 1
-    W = np.zeros((n, k))
-    R = X.copy()
+    S = Sinit.copy()
+    W = Winit.copy()
+    R = X - W@S
    prevcost = np.inf
    bestcost = np.sum(R**2)
    rounds = 0
+    bestS = S.copy()
+    bestW = W.copy()
    while bestcost < prevcost:
        prevcost = bestcost
        for r in range(1, k):
@@ -153,8 +155,16 @@ def greedy_est(X, k, eps):
            R -= np.outer(W[:, r], S[r, :])

        bestcost = np.sum(R**2)
+        if bestcost < prevcost:
+            bestS = S.copy()
+            bestW = W.copy()
+        else:
+            bestcost = prevcost # Estimate failed to improve
+        #print(bestcost)
        rounds += 1
-    return W, S, bestcost, rounds
+    return bestW, bestS, bestcost, rounds
+
+

 def extract_ranges(S):
    k, m = S.shape
@@ -251,16 +261,17 @@ def hill(R, w):
    return i, j

 # IHill algorithm
-def iterative_hill(X, W):
+def iterative_hill(X, Winit, Sinit):
    n, m = X.shape
-    k = W.shape[1]
-    S = np.zeros((k, m))
-    S[0, :] = 1
+    S = Sinit.copy()
+    W = Winit.copy()
+    R = X - W@S

    prevcost = np.inf
-    R = X - W@S
    bestcost = np.sum(R**2)

+    k = W.shape[1]
+
    rounds = 0
    while bestcost < prevcost:
        prevcost = bestcost
@@ -284,16 +295,6 @@ def iterative_hill(X, W):
    return W, S, bestcost, rounds


-def generate_seed(X, k):
-    n, m = X.shape
-    S = np.zeros((k, m))
-    S[0, :] = 1
-    for r in range(1, k):
-        i = np.random.randint(m)
-        j = np.random.randint(m)
-        S[r, min(i, j):(1 + max(i, j))] = 1
-    return X@S.T@np.linalg.inv(S@S.T + 10e-10*np.eye(k))
-
 def buildlookup(k):
    A = -np.ones((3**k, k), dtype=int)
    for i in range(A.shape[0]):
@@ -335,11 +336,13 @@ def builddecomp(W):


 # Iexact algorithm
-def iterative_exact(X, W):
+def iterative_exact(X, Winit, Sinit):
    n, m = X.shape
+    S = Sinit.copy()
+    W = Winit.copy()
+    R = X - W@S
+
    k = W.shape[1]
-    S = np.zeros((k, m))
-    S[0, :] = 1

    prevcost = np.inf
    R = X - W@S
@@ -384,7 +387,6 @@ def iterative_exact(X, W):
            S[ind, j] = 1
            c = best[c, j]

-
        S = complete_S(S)
        W = X@S.T@np.linalg.inv(S@S.T)
        R = X - W@S
@@ -392,3 +394,75 @@ def iterative_exact(X, W):
        rounds += 1

    return W, S, bestcost, rounds
+
+
+def segment(X, k):
+    m = X.shape[1]
+    o = Oracle(X)
+    score = np.zeros((k, m))
+    ind = np.zeros((k, m), dtype=int)
+    for i in range(m):
+        score[0, i] = o.inner_cost(0, i)
+        ind[0, i] = -1
+
+    for r in range(1, k):
+        for i in range(m):
+            score[r, i] = score[r - 1, i]
+            ind[r, i] = ind[r - 1, i]
+            for j in range(1, i + 1):
+                cand = o.inner_cost(j, i) + score[r - 1, j - 1]
+                if cand < score[r, i]:
+                    score[r, i] = cand
+                    ind[r, i] = j - 1
+    i = m - 1
+    intervals = []
+    for r in reversed(range(k)):
+        intervals.append([ind[r, i] + 1, i])
+        i = ind[r, i]
+        if i < 0:
+            break
+    return intervals
+
+def intervals2S(intervals):
+    S = np.zeros((intervals.shape[0], np.max(intervals) + 1))
+    for i, (a, b) in enumerate(intervals):
+        S[i, a:(b + 1)] = 1
+    return S
+
+# Merge algorithm
+def bottomup(X, k):
+    intervals = np.array([(0, X.shape[1] - 1)] + segment(X, 2*k - 1))
+
+    while (intervals.shape[0] > k):
+        q = intervals.shape[0]
+        costs = np.zeros(q*(q - 1)//2)
+        ties = np.zeros(q*(q - 1)//2)
+        cands = [None] * (q*(q - 1)//2)
+        ind = 0
+        for i in range(q):
+            for j in range(i):
+                a = intervals[i, :]
+                b = intervals[j, :]
+                newint = np.array([min(a[0], b[0]), max(a[1], b[1])])
+                cand = np.zeros((q - 1, 2))
+                cand = np.vstack((intervals[:j, :], newint, intervals[(1 + j):i, :], intervals[(i + 1):, :]))
+                S = intervals2S(cand)
+                S = complete_S(S)
+                W = X@S.T@np.linalg.inv(S@S.T)
+                R = X - W@S
+                costs[ind] = np.sum(R**2)
+                ties[ind] = newint[1] - newint[0]
+                cands[ind] = cand
+                ind += 1
+
+        best = np.min(costs)
+        inds = np.nonzero(costs < best + 10e-10)[0]
+        intervals = cands[inds[np.argmin(ties[inds])]]
+
+    S = intervals2S(intervals)
+    S = complete_S(S)
+    W = X@S.T@np.linalg.inv(S@S.T)
+    R = X - W@S
+    bestcost = np.sum(R**2)
+    return W, S, bestcost
+