Pythonで実装する画像認識アルゴリズム SLIC 入門
こんにちは。データサイエンスチーム tmtkです。
この記事では、SLIC (Simple Linear Iterative Clustering) を紹介します。紹介にあたって、私がPython 3で実装したものを使って解説していきます。
SLIC (Simple Linear Iterative Clustering) とは、画像認識にかかわるアルゴリズムのひとつです。Radhakrishna Achanta, Appu Shaji, Kevin Smith, Aurelien Lucchi, Pascal Fua, and Sabine Süsstrunkらが2010年に発明したようです(文献[1, 3])。
- 画像を読み込み、グレースケールやRGBであらわされている値をLab色空間に変換する
- 座標にある色の画素の特徴量として、5次元ユークリッド空間上の点を使う
- 手順2.で得た画素の特徴量の空間に、k平均法の亜種を適用し、クラスタリングを行う
import sys, math import numpy as np from skimage import io, color
class SLIC:
def __init__(self, k, m = 20): """ Constructor. k: the number of superpixels. m: a parameter to weigh the relative importance of spatial proximity. """ self.k = k self.m = m self.iter_max = 10 # c.f. the paper.
def fit(self, img_path): """ Calculate superpixels. Returns the mask array. """ self.fit_init(img_path) self.fit_iter() return self.l
- 画像をLab色空間に変換する
- 位置にある色の画素を座標の点とみなす
(コンピュータで処理する都合上、座標はではなくという逆転した順番になっています) - 個のクラスタの中心を等間隔に初期化する
(文献[2]では、物体のふちの部分にクラスタの中心点を置くことをさけるために、等間隔でおいたクラスタの中心の周囲3×3画素も見て、その中で勾配(文献[3]を参照)が一番小さい画素にクラスタの中心をおきなおすと説明されていますが、ここではその処理は省略しています) - 番目の点と最寄のクラスタの中心の距離をに初期化する
- クラスタの直径の近似値(は画素数)を計算しておく
def fit_init(self, img_path): """ Read the image from img_path, convert to Lab color space, and initialize cluster centers. """ img_rgb = io.imread(img_path) if img_rgb.ndim != 3 or img_rgb.shape[2] != 3: raise Exception("Non RGB file. The shape was {}.".format(img_rgb.shape)) img_lab = color.rgb2lab(img_rgb) self.height = img_lab.shape[0] self.width = img_lab.shape[1] self.pixels = [] for h in range(self.height): for w in range(self.width): self.pixels.append(np.array([img_lab[h][w][0], img_lab[h][w][1], img_lab[h][w][2], h, w])) self.size = len(self.pixels) # Initialize cluster centers to be regularly spaced. self.cluster_center = [] k_w = int(math.sqrt(self.k * self.width / self.height)) + 1 k_h = int(math.sqrt(self.k * self.height / self.width)) + 1 for h_cnt in range(k_h): h = (2 * h_cnt + 1) * self.height // (2 * k_h) for w_cnt in range(k_w): w = (2 * w_cnt + 1) * self.width // (2 * k_w) self.cluster_center.append(self.pixels[h*self.width + w]) self.k = k_w*k_h self.l = [None] * self.size # The cluster labels self.d = [math.inf] * self.size # The distance between a pixel and the nearest cluster center self.S = int(math.sqrt(self.size/self.k)) # The approximate distance between cluster centers self.metric = np.diagflat([1/(self.m**2)]*3 + [1/(self.S**2)]*2)
def fit_iter(self): """ Iteration step. """ for iter_cnt in range(self.iter_max): for center_idx, center in enumerate(self.cluster_center): for h in range(max(0, int(center[3])-self.S), min(self.height, int(center[3])+self.S)): for w in range(max(0, int(center[4])-self.S), min(self.width, int(center[4])+self.S)): d = self.distance(self.pixels[h*self.width + w], center) if d < self.d[h*self.width + w]: self.d[h*self.width + w] = d self.l[h*self.width + w] = center_idx self.calc_new_center() def distance(self, x, y): """ Squared distance between x and y. """ return (x-y).dot(self.metric).dot(x-y) def calc_new_center(self): """ Caluclate new cluster centers. """ cnt = [0] * self.k new_cluster_center = [np.array([0., 0., 0., 0. ,0.]) for _ in range(self.k)] for i in range(self.size): new_cluster_center[self.l[i]] += self.pixels[i] cnt[self.l[i]] += 1 for i in range(self.k): new_cluster_center[i] /= cnt[i] self.cluster_center = new_cluster_center
ここまでの実装で、 SLIC(k=100).fit("choco.jpg")
def transform(self): """ Returns new image RGB ndarray """ cnt = [0] * self.k cluster_color = [np.array([0., 0., 0.]) for _ in range(self.k)] for i in range(self.size): cluster_color[self.l[i]] += self.pixels[i][:3] cnt[self.l[i]] += 1 for i in range(self.k): cluster_color[i] /= cnt[i] new_img_lab = np.zeros((self.height, self.width, 3)) for h in range(self.height): for w in range(self.width): new_img_lab[h][w] = cluster_color[self.l[h*self.width + w]] return color.lab2rgb(new_img_lab)
slic = SLIC(k = 100)"choco.jpg") res = slic.transform() io.imshow(res)
from skimage import io, segmentation, color img = io.imread("choco.jpg") label = segmentation.slic(img, compactness=20) out = color.label2rgb(label, img, kind = 'avg') io.imsave("lena_skimage.png", out)
パラメータを自動的に決めるSLICOという手法もあります(文献[1, 2])。
""" SLIC implementation in Python 3 """ import sys, math import numpy as np from skimage import io, color class SLIC: def __init__(self, k, m = 20): """ Constructor. k: the number of superpixels. m: a parameter to weigh the relative importance of spatial proximity. """ self.k = k self.m = m self.iter_max = 10 # c.f. the paper. def fit(self, img_path): """ Calculate superpixels. Returns the mask array. """ self.fit_init(img_path) self.fit_iter() return self.l def fit_init(self, img_path): """ Read the image from img_path, convert to Lab color space, and initialize cluster centers. """ img_rgb = io.imread(img_path) if img_rgb.ndim != 3 or img_rgb.shape[2] != 3: raise Exception("Non RGB file. The shape was {}.".format(img_rgb.shape)) img_lab = color.rgb2lab(img_rgb) self.height = img_lab.shape[0] self.width = img_lab.shape[1] self.pixels = [] for h in range(self.height): for w in range(self.width): self.pixels.append(np.array([img_lab[h][w][0], img_lab[h][w][1], img_lab[h][w][2], h, w])) self.size = len(self.pixels) # Initialize cluster centers to be regularly spaced. self.cluster_center = [] k_w = int(math.sqrt(self.k * self.width / self.height)) + 1 k_h = int(math.sqrt(self.k * self.height / self.width)) + 1 for h_cnt in range(k_h): h = (2 * h_cnt + 1) * self.height // (2 * k_h) for w_cnt in range(k_w): w = (2 * w_cnt + 1) * self.width // (2 * k_w) self.cluster_center.append(self.pixels[h*self.width + w]) self.k = k_w*k_h self.l = [None] * self.size # The cluster labels self.d = [math.inf] * self.size # The distance between a pixel and the nearest cluster center self.S = int(math.sqrt(self.size/self.k)) # The approximate distance between cluster centers self.metric = np.diagflat([1/(self.m**2)]*3 + [1/(self.S**2)]*2) def fit_iter(self): """ Iteration step. """ for iter_cnt in range(self.iter_max): for center_idx, center in enumerate(self.cluster_center): for h in range(max(0, int(center[3])-self.S), min(self.height, int(center[3])+self.S)): for w in range(max(0, int(center[4])-self.S), min(self.width, int(center[4])+self.S)): d = self.distance(self.pixels[h*self.width + w], center) if d < self.d[h*self.width + w]: self.d[h*self.width + w] = d self.l[h*self.width + w] = center_idx self.calc_new_center() def distance(self, x, y): return (x-y).dot(self.metric).dot(x-y) self.iter_max = 10 # c.f. the paper. def fit(self, img_path): """ Calculate superpixels. Returns the mask array. """ self.fit_init(img_path) self.fit_iter() return self.l def fit_init(self, img_path): """ Read the image from img_path, convert to Lab color space, and initialize cluster centers. """ img_rgb = io.imread(img_path) if img_rgb.ndim != 3 or img_rgb.shape[2] != 3: raise Exception("Non RGB file. The shape was {}.".format(img_rgb.shape)) img_lab = color.rgb2lab(img_rgb) self.height = img_lab.shape[0] self.width = img_lab.shape[1] self.pixels = [] for h in range(self.height): for w in range(self.width): self.pixels.append(np.array([img_lab[h][w][0], img_lab[h][w][1], img_lab[h][w][2], h, w])) self.size = len(self.pixels) # Initialize cluster centers to be regularly spaced. self.cluster_center = [] k_w = int(math.sqrt(self.k * self.width / self.height)) + 1 k_h = int(math.sqrt(self.k * self.height / self.width)) + 1 for h_cnt in range(k_h): h = (2 * h_cnt + 1) * self.height // (2 * k_h) for w_cnt in range(k_w): w = (2 * w_cnt + 1) * self.width // (2 * k_w) self.cluster_center.append(self.pixels[h*self.width + w]) self.k = k_w*k_h self.l = [None] * self.size # The cluster labels self.d = [math.inf] * self.size # The distance between a pixel and the nearest cluster center self.S = int(math.sqrt(self.size/self.k)) # The approximate distance between cluster centers self.metric = np.diagflat([1/(self.m**2)]*3 + [1/(self.S**2)]*2) def fit_iter(self): """ Iteration step. """ for iter_cnt in range(self.iter_max): for center_idx, center in enumerate(self.cluster_center): for h in range(max(0, int(center[3])-self.S), min(self.height, int(center[3])+self.S)): for w in range(max(0, int(center[4])-self.S), min(self.width, int(center[4])+self.S)): d = self.distance(self.pixels[h*self.width + w], center) if d < self.d[h*self.width + w]: self.d[h*self.width + w] = d self.l[h*self.width + w] = center_idx self.calc_new_center() def distance(self, x, y): """ Squared distance between x and y. """ return (x-y).dot(self.metric).dot(x-y) def calc_new_center(self): """ Caluclate new cluster centers. """ cnt = [0] * self.k new_cluster_center = [np.array([0., 0., 0., 0. ,0.]) for _ in range(self.k)] for i in range(self.size): new_cluster_center[self.l[i]] += self.pixels[i] cnt[self.l[i]] += 1 for i in range(self.k): new_cluster_center[i] /= cnt[i] self.cluster_center = new_cluster_center def transform(self): """ Returns new image RGB ndarray """ cnt = [0] * self.k cluster_color = [np.array([0., 0., 0.]) for _ in range(self.k)] for i in range(self.size): cluster_color[self.l[i]] += self.pixels[i][:3] cnt[self.l[i]] += 1 for i in range(self.k): cluster_color[i] /= cnt[i] new_img_lab = np.zeros((self.height, self.width, 3)) for h in range(self.height): for w in range(self.width): new_img_lab[h][w] = cluster_color[self.l[h*self.width + w]] return color.lab2rgb(new_img_lab)
- Superpixel segmentation | IVRL
- Radhakrishna Achanta, Appu Shaji, Kevin Smith, Aurelien Lucchi, Pascal Fua, and Sabine Süsstrunk, SLIC Superpixels Compared to State-of-the-art Superpixel Methods, IEEE Transactions on Pattern Analysis and Machine Intelligence, vol. 34, num. 11, p. 2274 – 2282, May 2012.
- Radhakrishna Achanta, Appu Shaji, Kevin Smith, Aurelien Lucchi, Pascal Fua, and Sabine Süsstrunk, SLIC Superpixels, EPFL Technical Report no. 149300, June 2010.
- k平均法 – Wikipedia
- Lab色空間 – Wikipedia
- バレンタインのチョコレートケーキを焼く女性|ぱくたそフリー素材
- scikit-image: Image processing in Python — scikit-image
- Normalized Cut — skimage v0.14dev docs
テックブログ新着情報のほか、AWSやGoogle Cloudに関するお役立ち情報を配信中!
Follow @twitterデータ分析と機械学習とソフトウェア開発をしています。 アルゴリズムとデータ構造が好きです。
Special Topics
AWSの料金が 10 %割引になる!