diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 738c02e8e..0835ebbf7 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -15,11 +15,11 @@ repos:
         name: Check file encoding
         entry: bash -c 'for file in "$@"; do file --mime-encoding $file | grep -q "ascii\|binary"; if [ $? != 0 ]; then echo $file; exit 1; fi; done' --
         types: [text]
-    # -   id: flake8
-    #     name: Check Python format
-    #     entry: flake8 --count --show-source --statistics
-    #     language: system
-    #     types: [python]
+    -   id: flake8
+        name: Check Python format
+        entry: flake8 --count --show-source --statistics
+        language: system
+        types: [python]
     -   id: unittest
         name: Run Python unittests
         language: system
diff --git a/prml/bayesnet/discrete.py b/prml/bayesnet/discrete.py
index c74cb6bb6..8a85c81f5 100644
--- a/prml/bayesnet/discrete.py
+++ b/prml/bayesnet/discrete.py
@@ -1,4 +1,5 @@
 import numpy as np
+
 from prml.bayesnet.probability_function import ProbabilityFunction
 from prml.bayesnet.random_variable import RandomVariable
 
@@ -35,7 +36,7 @@ def __init__(self, n_class:int):
         self.is_observed = False
 
     def __repr__(self):
-        string = f"DiscreteVariable("
+        string = "DiscreteVariable("
         if self.is_observed:
             string += f"observed={self.proba})"
         else:
@@ -201,7 +202,8 @@ def send_message(self, proprange, exclude=None):
             if random_variable is not exclude:
                 self.send_message_to(random_variable, proprange)
 
-        if proprange == 0: return
+        if proprange == 0:
+            return
 
         for random_variable in self.condition:
             if random_variable is not exclude:
diff --git a/prml/clustering/__init__.py b/prml/clustering/__init__.py
index 169827c8a..aeb18a1df 100644
--- a/prml/clustering/__init__.py
+++ b/prml/clustering/__init__.py
@@ -1,4 +1,4 @@
-from .k_means import KMeans
+from prml.clustering.k_means import KMeans
 
 
 __all__ = [
diff --git a/prml/clustering/k_means.py b/prml/clustering/k_means.py
index d3f890bc8..89f02b52a 100644
--- a/prml/clustering/k_means.py
+++ b/prml/clustering/k_means.py
@@ -7,13 +7,13 @@ class KMeans(object):
     def __init__(self, n_clusters):
         self.n_clusters = n_clusters
 
-    def fit(self, X, iter_max=100):
+    def fit(self, x, iter_max=100):
         """
         perform k-means algorithm
 
         Parameters
         ----------
-        X : (sample_size, n_features) ndarray
+        x : (sample_size, n_features) ndarray
             input data
         iter_max : int
             maximum number of iterations
@@ -23,25 +23,25 @@ def fit(self, X, iter_max=100):
         centers : (n_clusters, n_features) ndarray
             center of each cluster
         """
-        I = np.eye(self.n_clusters)
-        centers = X[np.random.choice(len(X), self.n_clusters, replace=False)]
+        eye = np.eye(self.n_clusters)
+        centers = x[np.random.choice(len(x), self.n_clusters, replace=False)]
         for _ in range(iter_max):
             prev_centers = np.copy(centers)
-            D = cdist(X, centers)
+            D = cdist(x, centers)
             cluster_index = np.argmin(D, axis=1)
-            cluster_index = I[cluster_index]
-            centers = np.sum(X[:, None, :] * cluster_index[:, :, None], axis=0) / np.sum(cluster_index, axis=0)[:, None]
+            cluster_index = eye[cluster_index]
+            centers = np.sum(x[:, None, :] * cluster_index[:, :, None], axis=0) / np.sum(cluster_index, axis=0)[:, None]
             if np.allclose(prev_centers, centers):
                 break
         self.centers = centers
 
-    def predict(self, X):
+    def predict(self, x):
         """
         calculate closest cluster center index
 
         Parameters
         ----------
-        X : (sample_size, n_features) ndarray
+        x : (sample_size, n_features) ndarray
             input data
 
         Returns
@@ -49,5 +49,5 @@ def predict(self, X):
         index : (sample_size,) ndarray
             indicates which cluster they belong
         """
-        D = cdist(X, self.centers)
+        D = cdist(x, self.centers)
         return np.argmin(D, axis=1)
diff --git a/prml/dimreduction/autoencoder.py b/prml/dimreduction/autoencoder.py
index e5c19edfa..09f8d1868 100644
--- a/prml/dimreduction/autoencoder.py
+++ b/prml/dimreduction/autoencoder.py
@@ -1,4 +1,5 @@
 import numpy as np
+
 from prml import nn
 
 
diff --git a/prml/dimreduction/bayesian_pca.py b/prml/dimreduction/bayesian_pca.py
index 9e9e280d6..247b78f90 100644
--- a/prml/dimreduction/bayesian_pca.py
+++ b/prml/dimreduction/bayesian_pca.py
@@ -1,4 +1,5 @@
 import numpy as np
+
 from prml.dimreduction.pca import PCA
 
 
@@ -26,7 +27,7 @@ def fit(self, X, iter_max=100, initial="random"):
         """
         initial_list = ["random", "eigen"]
         self.mean = np.mean(X, axis=0)
-        self.I = np.eye(self.n_components)
+        self.eye = np.eye(self.n_components)
         if initial not in initial_list:
             print("availabel initializations are {}".format(initial_list))
         if initial == "random":
diff --git a/prml/dimreduction/pca.py b/prml/dimreduction/pca.py
index 55d7b7f8e..72c96feee 100644
--- a/prml/dimreduction/pca.py
+++ b/prml/dimreduction/pca.py
@@ -15,14 +15,14 @@ def __init__(self, n_components):
         assert isinstance(n_components, int)
         self.n_components = n_components
 
-    def fit(self, X, method="eigen", iter_max=100):
-        """
-        maximum likelihood estimate of pca parameters
+    def fit(self, x, method="eigen", iter_max=100):
+        r"""Maximum likelihood estimate of pca parameters.
+
         x ~ \int_z N(x|Wz+mu,sigma^2)N(z|0,I)dz
 
         Parameters
         ----------
-        X : (sample_size, n_features) ndarray
+        x : (sample_size, n_features) ndarray
             input data
         method : str
             method to estimate the parameters
@@ -46,69 +46,69 @@ def fit(self, X, method="eigen", iter_max=100):
         method_list = ["eigen", "em"]
         if method not in method_list:
             print("availabel methods are {}".format(method_list))
-        self.mean = np.mean(X, axis=0)
-        getattr(self, method)(X - self.mean, iter_max)
+        self.mean = np.mean(x, axis=0)
+        getattr(self, method)(x - self.mean, iter_max)
 
-    def eigen(self, X, *arg):
-        sample_size, n_features = X.shape
+    def eigen(self, x, *arg):
+        sample_size, n_features = x.shape
         if sample_size >= n_features:
-            cov = np.cov(X, rowvar=False)
+            cov = np.cov(x, rowvar=False)
             values, vectors = np.linalg.eigh(cov)
             index = n_features - self.n_components
         else:
-            cov = np.cov(X)
+            cov = np.cov(x)
             values, vectors = np.linalg.eigh(cov)
-            vectors = (X.T @ vectors) / np.sqrt(sample_size * values)
+            vectors = (x.T @ vectors) / np.sqrt(sample_size * values)
             index = sample_size - self.n_components
-        self.I = np.eye(self.n_components)
+        self.eye = np.eye(self.n_components)
         if index == 0:
             self.var = 0
         else:
             self.var = np.mean(values[:index])
 
-        self.W = vectors[:, index:].dot(np.sqrt(np.diag(values[index:]) - self.var * self.I))
-        self.__M = self.W.T @ self.W + self.var * self.I
+        self.W = vectors[:, index:].dot(np.sqrt(np.diag(values[index:]) - self.var * self.eye))
+        self.__M = self.W.T @ self.W + self.var * self.eye
         self.C = self.W @ self.W.T + self.var * np.eye(n_features)
         if index == 0:
             self.Cinv = np.linalg.inv(self.C)
         else:
             self.Cinv = np.eye(n_features) / np.sqrt(self.var) - self.W @ np.linalg.inv(self.__M) @ self.W.T / self.var
 
-    def em(self, X, iter_max):
-        self.I = np.eye(self.n_components)
-        self.W = np.eye(np.size(X, 1), self.n_components)
+    def em(self, x, iter_max):
+        self.eye = np.eye(self.n_components)
+        self.W = np.eye(np.size(x, 1), self.n_components)
         self.var = 1.
         for i in range(iter_max):
             W = np.copy(self.W)
-            stats = self._expectation(X)
-            self._maximization(X, *stats)
+            stats = self._expectation(x)
+            self._maximization(x, *stats)
             if np.allclose(W, self.W):
                 break
-        self.C = self.W @ self.W.T + self.var * np.eye(np.size(X, 1))
+        self.C = self.W @ self.W.T + self.var * np.eye(np.size(x, 1))
         self.Cinv = np.linalg.inv(self.C)
 
-    def _expectation(self, X):
-        self.__M = self.W.T @ self.W + self.var * self.I
+    def _expectation(self, x):
+        self.__M = self.W.T @ self.W + self.var * self.eye
         Minv = np.linalg.inv(self.__M)
-        Ez = X @ self.W @ Minv
+        Ez = x @ self.W @ Minv
         Ezz = self.var * Minv + Ez[:, :, None] * Ez[:, None, :]
         return Ez, Ezz
 
-    def _maximization(self, X, Ez, Ezz):
-        self.W = X.T @ Ez @ np.linalg.inv(np.sum(Ezz, axis=0))
+    def _maximization(self, x, Ez, Ezz):
+        self.W = x.T @ Ez @ np.linalg.inv(np.sum(Ezz, axis=0))
         self.var = np.mean(
-            np.mean(X ** 2, axis=1)
-            - 2 * np.mean(Ez @ self.W.T * X, axis=1)
-            + np.trace((Ezz @ self.W.T @ self.W).T) / np.size(X, 1))
+            np.mean(x ** 2, axis=1)
+            - 2 * np.mean(Ez @ self.W.T * x, axis=1)
+            + np.trace((Ezz @ self.W.T @ self.W).T) / np.size(x, 1))
 
-    def transform(self, X):
+    def transform(self, x):
         """
         project input data into latent space
-        p(Z|X) = N(Z|(X-mu)WMinv, sigma^-2M)
+        p(Z|x) = N(Z|(x-mu)WMinv, sigma^-2M)
 
         Parameters
         ----------
-        X : (sample_size, n_features) ndarray
+        x : (sample_size, n_features) ndarray
             input data
 
         Returns
@@ -116,15 +116,15 @@ def transform(self, X):
         Z : (sample_size, n_components) ndarray
             projected input data
         """
-        return np.linalg.solve(self.__M, ((X - self.mean) @ self.W).T).T
+        return np.linalg.solve(self.__M, ((x - self.mean) @ self.W).T).T
 
-    def fit_transform(self, X, method="eigen"):
+    def fit_transform(self, x, method="eigen"):
         """
         perform pca and whiten the input data
 
         Parameters
         ----------
-        X : (sample_size, n_features) ndarray
+        x : (sample_size, n_features) ndarray
             input data
 
         Returns
@@ -132,16 +132,16 @@ def fit_transform(self, X, method="eigen"):
         Z : (sample_size, n_components) ndarray
             projected input data
         """
-        self.fit(X, method)
-        return self.transform(X)
+        self.fit(x, method)
+        return self.transform(x)
 
-    def proba(self, X):
+    def proba(self, x):
         """
         the marginal distribution of the observed variable
 
         Parameters
         ----------
-        X : (sample_size, n_features) ndarray
+        x : (sample_size, n_features) ndarray
             input data
 
         Returns
@@ -149,8 +149,8 @@ def proba(self, X):
         p : (sample_size,) ndarray
             value of the marginal distribution
         """
-        d = X - self.mean
+        d = x - self.mean
         return (
             np.exp(-0.5 * np.sum(d @ self.Cinv * d, axis=-1))
             / np.sqrt(np.linalg.det(self.C))
-            / np.power(2 * np.pi, 0.5 * np.size(X, 1)))
+            / np.power(2 * np.pi, 0.5 * np.size(x, 1)))
diff --git a/prml/kernel/__init__.py b/prml/kernel/__init__.py
index 01b72f065..8e6d343a4 100644
--- a/prml/kernel/__init__.py
+++ b/prml/kernel/__init__.py
@@ -1,8 +1,7 @@
-from prml.kernel.polynomial import PolynomialKernel
-from prml.kernel.rbf import RBF
-
 from prml.kernel.gaussian_process_classifier import GaussianProcessClassifier
 from prml.kernel.gaussian_process_regressor import GaussianProcessRegressor
+from prml.kernel.polynomial import PolynomialKernel
+from prml.kernel.rbf import RBF
 from prml.kernel.relevance_vector_classifier import RelevanceVectorClassifier
 from prml.kernel.relevance_vector_regressor import RelevanceVectorRegressor
 from prml.kernel.support_vector_classifier import SupportVectorClassifier
diff --git a/prml/kernel/gaussian_process_classifier.py b/prml/kernel/gaussian_process_classifier.py
index 5fa1500a4..d56c4ca77 100644
--- a/prml/kernel/gaussian_process_classifier.py
+++ b/prml/kernel/gaussian_process_classifier.py
@@ -20,18 +20,18 @@ def __init__(self, kernel, noise_level=1e-4):
     def _sigmoid(self, a):
         return np.tanh(a * 0.5) * 0.5 + 0.5
 
-    def fit(self, X, t):
-        if X.ndim == 1:
-            X = X[:, None]
-        self.X = X
+    def fit(self, x, t):
+        if x.ndim == 1:
+            x = x[:, None]
+        self.x = x
         self.t = t
-        Gram = self.kernel(X, X)
+        Gram = self.kernel(x, x)
         self.covariance = Gram + np.eye(len(Gram)) * self.noise_level
         self.precision = np.linalg.inv(self.covariance)
 
-    def predict(self, X):
-        if X.ndim == 1:
-            X = X[:, None]
-        K = self.kernel(X, self.X)
+    def predict(self, x):
+        if x.ndim == 1:
+            x = x[:, None]
+        K = self.kernel(x, self.x)
         a_mean = K @ self.precision @ self.t
         return self._sigmoid(a_mean)
diff --git a/prml/kernel/gaussian_process_regressor.py b/prml/kernel/gaussian_process_regressor.py
index 4f1aa94fd..af09f75bb 100644
--- a/prml/kernel/gaussian_process_regressor.py
+++ b/prml/kernel/gaussian_process_regressor.py
@@ -49,9 +49,9 @@ def fit(self, X, t, iter_max=0, learning_rate=0.1):
         log_likelihood_list = [-np.inf]
         self.X = X
         self.t = t
-        I = np.eye(len(X))
-        Gram = self.kernel(X, X)
-        self.covariance = Gram + I / self.beta
+        eye = np.eye(len(X))
+        gram = self.kernel(X, X)
+        self.covariance = gram + eye / self.beta
         self.precision = np.linalg.inv(self.covariance)
         for i in range(iter_max):
             gradients = self.kernel.derivatives(X, X)
@@ -59,8 +59,8 @@ def fit(self, X, t, iter_max=0, learning_rate=0.1):
                 [-np.trace(self.precision.dot(grad)) + t.dot(self.precision.dot(grad).dot(self.precision).dot(t)) for grad in gradients])
             for j in range(iter_max):
                 self.kernel.update_parameters(learning_rate * updates)
-                Gram = self.kernel(X, X)
-                self.covariance = Gram + I / self.beta
+                gram = self.kernel(X, X)
+                self.covariance = gram + eye / self.beta
                 self.precision = np.linalg.inv(self.covariance)
                 log_like = self.log_likelihood()
                 if log_like > log_likelihood_list[-1]:
diff --git a/prml/kernel/polynomial.py b/prml/kernel/polynomial.py
index 9e3bb08ac..71f51064a 100644
--- a/prml/kernel/polynomial.py
+++ b/prml/kernel/polynomial.py
@@ -1,4 +1,5 @@
 import numpy as np
+
 from prml.kernel.kernel import Kernel
 
 
diff --git a/prml/kernel/rbf.py b/prml/kernel/rbf.py
index 4115eb3b7..ab1bb7259 100644
--- a/prml/kernel/rbf.py
+++ b/prml/kernel/rbf.py
@@ -1,4 +1,5 @@
 import numpy as np
+
 from prml.kernel.kernel import Kernel
 
 
diff --git a/prml/kernel/relevance_vector_classifier.py b/prml/kernel/relevance_vector_classifier.py
index 44534bb00..78e681dfa 100644
--- a/prml/kernel/relevance_vector_classifier.py
+++ b/prml/kernel/relevance_vector_classifier.py
@@ -20,21 +20,21 @@ def __init__(self, kernel, alpha=1.):
     def _sigmoid(self, a):
         return np.tanh(a * 0.5) * 0.5 + 0.5
 
-    def _map_estimate(self, X, t, w, n_iter=10):
+    def _map_estimate(self, x, t, w, n_iter=10):
         for _ in range(n_iter):
-            y = self._sigmoid(X @ w)
-            g = X.T @ (y - t) + self.alpha * w
-            H = (X.T * y * (1 - y)) @ X + np.diag(self.alpha)
+            y = self._sigmoid(x @ w)
+            g = x.T @ (y - t) + self.alpha * w
+            H = (x.T * y * (1 - y)) @ x + np.diag(self.alpha)
             w -= np.linalg.solve(H, g)
         return w, np.linalg.inv(H)
 
-    def fit(self, X, t, iter_max=100):
+    def fit(self, x, t, iter_max=100):
         """
         maximize evidence with respect ot hyperparameter
 
         Parameters
         ----------
-        X : (sample_size, n_features) ndarray
+        x : (sample_size, n_features) ndarray
             input
         t : (sample_size,) ndarray
             corresponding target
@@ -43,7 +43,7 @@ def fit(self, X, t, iter_max=100):
 
         Attributes
         ----------
-        X : (N, n_features) ndarray
+        x : (N, n_features) ndarray
             relevance vector
         t : (N,) ndarray
             corresponding target
@@ -54,11 +54,11 @@ def fit(self, X, t, iter_max=100):
         mean : (N,) ndarray
             mean of each weight
         """
-        if X.ndim == 1:
-            X = X[:, None]
-        assert X.ndim == 2
+        if x.ndim == 1:
+            x = x[:, None]
+        assert x.ndim == 2
         assert t.ndim == 1
-        Phi = self.kernel(X, X)
+        Phi = self.kernel(x, x)
         N = len(t)
         self.alpha = np.zeros(N) + self.alpha
         mean = np.zeros(N)
@@ -71,20 +71,20 @@ def fit(self, X, t, iter_max=100):
             if np.allclose(param, self.alpha):
                 break
         mask = self.alpha < 1e8
-        self.X = X[mask]
+        self.x = x[mask]
         self.t = t[mask]
         self.alpha = self.alpha[mask]
-        Phi = self.kernel(self.X, self.X)
+        Phi = self.kernel(self.x, self.x)
         mean = mean[mask]
         self.mean, self.covariance = self._map_estimate(Phi, self.t, mean, 100)
 
-    def predict(self, X):
+    def predict(self, x):
         """
         predict class label
 
         Parameters
         ----------
-        X : (sample_size, n_features)
+        x : (sample_size, n_features)
             input
 
         Returns
@@ -92,20 +92,20 @@ def predict(self, X):
         label : (sample_size,) ndarray
             predicted label
         """
-        if X.ndim == 1:
-            X = X[:, None]
-        assert X.ndim == 2
-        phi = self.kernel(X, self.X)
+        if x.ndim == 1:
+            x = x[:, None]
+        assert x.ndim == 2
+        phi = self.kernel(x, self.x)
         label = (phi @ self.mean > 0).astype(int)
         return label
 
-    def predict_proba(self, X):
+    def predict_proba(self, x):
         """
         probability of input belonging class one
 
         Parameters
         ----------
-        X : (sample_size, n_features) ndarray
+        x : (sample_size, n_features) ndarray
             input
 
         Returns
@@ -113,10 +113,10 @@ def predict_proba(self, X):
         proba : (sample_size,) ndarray
             probability of predictive distribution p(C1|x)
         """
-        if X.ndim == 1:
-            X = X[:, None]
-        assert X.ndim == 2
-        phi = self.kernel(X, self.X)
+        if x.ndim == 1:
+            x = x[:, None]
+        assert x.ndim == 2
+        phi = self.kernel(x, self.x)
         mu_a = phi @ self.mean
         var_a = np.sum(phi @ self.covariance * phi, axis=1)
         return self._sigmoid(mu_a / np.sqrt(1 + np.pi * var_a / 8))
diff --git a/prml/markov/__init__.py b/prml/markov/__init__.py
index 6fd3ea780..73d002cec 100644
--- a/prml/markov/__init__.py
+++ b/prml/markov/__init__.py
@@ -1,7 +1,7 @@
-from .categorical_hmm import CategoricalHMM
-from .gaussian_hmm import GaussianHMM
+from prml.markov.categorical_hmm import CategoricalHMM
+from prml.markov.gaussian_hmm import GaussianHMM
 from prml.markov.kalman import Kalman, kalman_filter, kalman_smoother
-from .particle import Particle
+from prml.markov.particle import Particle
 
 
 __all__ = [
diff --git a/prml/markov/categorical_hmm.py b/prml/markov/categorical_hmm.py
index 3f18aef03..efbd46db8 100644
--- a/prml/markov/categorical_hmm.py
+++ b/prml/markov/categorical_hmm.py
@@ -1,5 +1,6 @@
 import numpy as np
-from .hmm import HiddenMarkovModel
+
+from prml.markov.hmm import HiddenMarkovModel
 
 
 class CategoricalHMM(HiddenMarkovModel):
@@ -55,8 +56,8 @@ def draw(self, n=100):
             hidden_state = np.random.choice(self.n_hidden, p=self.transition_proba[hidden_state])
         return np.asarray(seq)
 
-    def likelihood(self, X):
-        return self.means[X]
+    def likelihood(self, x):
+        return self.means[x]
 
     def maximize(self, seq, p_hidden, p_transition):
         self.initial_proba = p_hidden[0] / np.sum(p_hidden[0])
diff --git a/prml/markov/gaussian_hmm.py b/prml/markov/gaussian_hmm.py
index 9cdd60920..88007ad07 100644
--- a/prml/markov/gaussian_hmm.py
+++ b/prml/markov/gaussian_hmm.py
@@ -1,6 +1,7 @@
 import numpy as np
+
+from prml.markov.hmm import HiddenMarkovModel
 from prml.rv import MultivariateGaussian
-from .hmm import HiddenMarkovModel
 
 
 class GaussianHMM(HiddenMarkovModel):
@@ -61,8 +62,8 @@ def draw(self, n=100):
             hidden_state = np.random.choice(self.n_hidden, p=self.transition_proba[hidden_state])
         return np.asarray(seq)
 
-    def likelihood(self, X):
-        diff = X[:, None, :] - self.means
+    def likelihood(self, x):
+        diff = x[:, None, :] - self.means
         exponents = np.sum(
             np.einsum('nki,kij->nkj', diff, self.precisions) * diff, axis=-1)
         return np.exp(-0.5 * exponents) / np.sqrt(np.linalg.det(self.covs) * (2 * np.pi) ** self.ndim)
diff --git a/prml/markov/kalman.py b/prml/markov/kalman.py
index 723a0c887..61d0409fd 100644
--- a/prml/markov/kalman.py
+++ b/prml/markov/kalman.py
@@ -1,11 +1,11 @@
 import numpy as np
-from prml.rv.multivariate_gaussian import MultivariateGaussian as Gaussian
+
 from prml.markov.state_space_model import StateSpaceModel
 
 
 class Kalman(StateSpaceModel):
-    """
-    A class to perform kalman filtering or smoothing\n
+    """A class to perform kalman filtering or smoothing
+
     :math:`z` : internal state (random variable)\n
     :math:`x` : observation (random variable)
 
@@ -40,7 +40,6 @@ class Kalman(StateSpaceModel):
         dimensionality of observed variable
     """
 
-
     def __init__(self, system, cov_system, measure, cov_measure, mu0, P0):
         """
         construct Kalman model
@@ -221,9 +220,8 @@ def fit(self, sequence, max_iter=10):
         return kalman_smoother(self, sequence)
 
 
-def kalman_filter(kalman:Kalman, observed_sequence:np.ndarray)->tuple:
-    """
-    perform kalman filtering given Kalman model and observed sequence
+def kalman_filter(kalman: Kalman, observed_sequence: np.ndarray) -> tuple:
+    """Perform kalman filtering given Kalman model and observed sequence.
 
     Parameters
     ----------
@@ -246,8 +244,7 @@ def kalman_filter(kalman:Kalman, observed_sequence:np.ndarray)->tuple:
 
 
 def kalman_smoother(kalman:Kalman, observed_sequence:np.ndarray=None):
-    """
-    perform Kalman smoothing given Kalman model (and observed sequence)
+    """Perform Kalman smoothing given Kalman model (and observed sequence).
 
     Parameters
     ----------
diff --git a/prml/markov/particle.py b/prml/markov/particle.py
index d4e7c4d90..7fa3436ba 100644
--- a/prml/markov/particle.py
+++ b/prml/markov/particle.py
@@ -1,7 +1,8 @@
 import numpy as np
 from scipy.special import logsumexp
 from scipy.spatial.distance import cdist
-from .state_space_model import StateSpaceModel
+
+from prml.markov.state_space_model import StateSpaceModel
 
 
 class Particle(StateSpaceModel):
diff --git a/prml/markov/state_space_model.py b/prml/markov/state_space_model.py
index b4f55000e..fa0ab778a 100644
--- a/prml/markov/state_space_model.py
+++ b/prml/markov/state_space_model.py
@@ -2,4 +2,5 @@ class StateSpaceModel(object):
     """
     Base class for state-space models
     """
+
     pass
diff --git a/prml/nn/__init__.py b/prml/nn/__init__.py
index 4c26f6e61..d6e2a059d 100755
--- a/prml/nn/__init__.py
+++ b/prml/nn/__init__.py
@@ -1,3 +1,5 @@
+# flake8: noqa
+
 from prml.nn.config import config
 from prml.nn.network import Network
 from prml.nn import array
diff --git a/prml/nn/array/__init__.py b/prml/nn/array/__init__.py
index 0061bf265..4f62fad73 100755
--- a/prml/nn/array/__init__.py
+++ b/prml/nn/array/__init__.py
@@ -1,3 +1,5 @@
+# flake8: noqa
+
 from prml.nn.array.array import Array, array, asarray
 from prml.nn.array.reshape import reshape_method
 from prml.nn.function import broadcast, broadcast_to
diff --git a/prml/nn/array/array.py b/prml/nn/array/array.py
index fe90605d6..d2f96a53c 100755
--- a/prml/nn/array/array.py
+++ b/prml/nn/array/array.py
@@ -1,4 +1,5 @@
 import numpy as np
+
 from prml.nn.config import config
 from prml.nn.queue import backprop_queue
 
diff --git a/prml/nn/array/broadcast.py b/prml/nn/array/broadcast.py
index b08e757cd..da04fcf03 100755
--- a/prml/nn/array/broadcast.py
+++ b/prml/nn/array/broadcast.py
@@ -1,2 +1 @@
-import numpy as np
-from prml.nn.function import Function, broadcast, broadcast_to
+from prml.nn.function import Function, broadcast, broadcast_to  # noqa
diff --git a/prml/nn/array/ones.py b/prml/nn/array/ones.py
index 78fc1adcc..44e1c605c 100755
--- a/prml/nn/array/ones.py
+++ b/prml/nn/array/ones.py
@@ -1,6 +1,7 @@
+import numpy as np
+
 from prml.nn.array.array import Array
 from prml.nn.config import config
-import numpy as np
 
 
 def ones(size):
diff --git a/prml/nn/array/zeros.py b/prml/nn/array/zeros.py
index a793e8a45..4a3a1da5c 100755
--- a/prml/nn/array/zeros.py
+++ b/prml/nn/array/zeros.py
@@ -1,4 +1,5 @@
 import numpy as np
+
 from prml.nn.array.array import Array
 from prml.nn.config import config
 
diff --git a/prml/nn/distribution/bernoulli.py b/prml/nn/distribution/bernoulli.py
index 96b03f5cd..787900f7b 100755
--- a/prml/nn/distribution/bernoulli.py
+++ b/prml/nn/distribution/bernoulli.py
@@ -1,10 +1,11 @@
 import numpy as np
+
 from prml.nn.array.array import asarray
-from prml.nn.math.log import log
-from prml.nn.nonlinear.sigmoid import sigmoid
-from prml.nn.nonlinear.logit import logit as logit_func
 from prml.nn.distribution.distribution import Distribution
 from prml.nn.loss.sigmoid_cross_entropy import sigmoid_cross_entropy
+from prml.nn.math.log import log
+from prml.nn.nonlinear.logit import logit as logit_func
+from prml.nn.nonlinear.sigmoid import sigmoid
 
 
 class Bernoulli(Distribution):
diff --git a/prml/nn/distribution/categorical.py b/prml/nn/distribution/categorical.py
index 39dd194c3..9a79917c4 100755
--- a/prml/nn/distribution/categorical.py
+++ b/prml/nn/distribution/categorical.py
@@ -1,20 +1,31 @@
 import numpy as np
+
 from prml.nn.array.array import asarray
+from prml.nn.distribution.distribution import Distribution
 from prml.nn.function import Function
+from prml.nn.loss.softmax_cross_entropy import softmax_cross_entropy
 from prml.nn.math.log import log
 from prml.nn.nonlinear.softmax import softmax
-from prml.nn.distribution.distribution import Distribution
-from prml.nn.loss.softmax_cross_entropy import softmax_cross_entropy
 
 
 class Categorical(Distribution):
+    """Categorical distribution."""
+
     is_categorical = True
 
-    def __init__(self, mean=None, logit=None, use_gumbel_softmax=True, tau=0.1):
+    def __init__(
+        self,
+        mean=None,
+        logit=None,
+        use_gumbel_softmax=True,
+        tau=0.1,
+    ):
+        """Initialize a categorical distribution."""
         super().__init__()
         if mean is not None:
             self.mean = asarray(mean)
-            assert((self.mean.value >= 0).all() and np.allclose(self.mean.value.sum(axis=-1), 1))
+            v = self.mean.value
+            assert ((v >= 0).all() and np.allclose(v.sum(axis=-1), 1))
             self.logit = log(self.mean)
             self._log_pdf = self._log_pdf_mean
         elif logit is not None:
@@ -56,6 +67,7 @@ def _log_pdf_logit(self, x):
 
 
 class CategoricalPDF(Function):
+    """Probability density function of a categorical distribution."""
 
     def _forward(self, mean, x):
         proba = np.ones_like(mean)
diff --git a/prml/nn/function.py b/prml/nn/function.py
index 0efb96f1b..e8c3e281c 100755
--- a/prml/nn/function.py
+++ b/prml/nn/function.py
@@ -1,4 +1,5 @@
 import numpy as np
+
 from prml.nn.array.array import Array, asarray
 from prml.nn.config import config
 from prml.nn.queue import backprop_queue
diff --git a/prml/nn/image/__init__.py b/prml/nn/image/__init__.py
index 48be8ac13..b6f4f451a 100755
--- a/prml/nn/image/__init__.py
+++ b/prml/nn/image/__init__.py
@@ -1,3 +1,5 @@
+# flake8: noqa
+
 from prml.nn.image.convolve2d import convolve2d, Convolve2d
 from prml.nn.image.deconvolve2d import deconvolve2d, Deconvolve2d
 from prml.nn.image.max_pooling2d import max_pooling2d
diff --git a/prml/nn/image/convolve2d.py b/prml/nn/image/convolve2d.py
index e88cbaf3d..a43e6c9c1 100755
--- a/prml/nn/image/convolve2d.py
+++ b/prml/nn/image/convolve2d.py
@@ -1,4 +1,5 @@
 import numpy as np
+
 from prml.nn.array.array import Array
 from prml.nn.network import Network
 from prml.nn.function import Function
diff --git a/prml/nn/image/deconvolve2d.py b/prml/nn/image/deconvolve2d.py
index 8431a96dd..197982c94 100755
--- a/prml/nn/image/deconvolve2d.py
+++ b/prml/nn/image/deconvolve2d.py
@@ -1,4 +1,5 @@
 import numpy as np
+
 from prml.nn.array.array import Array
 from prml.nn.function import Function
 from prml.nn.network import Network
@@ -30,8 +31,10 @@ def __init__(self, kernel_size, out_ch, stride, pad, shape):
 
     def _forward(self, x, y):
         if self.shape is None:
-            shape = (len(x),) + tuple(s * (imlen - 1) + klen
-                for s, imlen, klen in zip(self.stride, x.shape[1:], self.kernel_size)) + (self.out_ch,)
+            shape = (len(x),) + tuple(
+                s * (imlen - 1) + klen for s, imlen, klen
+                in zip(self.stride, x.shape[1:], self.kernel_size)
+            ) + (self.out_ch,)
         else:
             shape = (len(x),) + self.shape + (self.out_ch,)
         patch_flat = np.matmul(x, y.T)   # (N, Hx, Wx, kx * ky * out_ch)
diff --git a/prml/nn/image/max_pooling2d.py b/prml/nn/image/max_pooling2d.py
index 0d76d8e0e..861c78d11 100755
--- a/prml/nn/image/max_pooling2d.py
+++ b/prml/nn/image/max_pooling2d.py
@@ -1,4 +1,5 @@
 import numpy as np
+
 from prml.nn.config import config
 from prml.nn.function import Function
 from prml.nn.image.util import img2patch, patch2img, patch2img_no_overlap
diff --git a/prml/nn/image/util.py b/prml/nn/image/util.py
index ba5db8b1e..0ef085352 100755
--- a/prml/nn/image/util.py
+++ b/prml/nn/image/util.py
@@ -1,11 +1,12 @@
 import itertools
+
 import numpy as np
 from numpy.lib.stride_tricks import as_strided
 
 
 def img2patch(img, size, step=1):
-    """
-    convert batch of image array into patches
+    """Convert batch of image array into patches.
+
     Parameters
     ----------
     img : (n_batch, xlen_in, ylen_in, in_chaprml.nnels) ndarray
@@ -39,8 +40,8 @@ def img2patch(img, size, step=1):
 
 
 def _patch2img(x, stride, shape):
-    """
-    sum up patches and form an image
+    """Sum up patches and form an image.
+
     Parameters
     ----------
     x : (n_batch, xlen_in, ylen_in, kx, ky, in_chaprml.nnels) ndarray
@@ -57,12 +58,16 @@ def _patch2img(x, stride, shape):
     img = np.zeros(shape, dtype=x.dtype)
     kx, ky = x.shape[3: 5]
     for i, j in itertools.product(range(kx), range(ky)):
-        slices = tuple(slice(b, b + s * len_, s) for b, s, len_ in zip([i, j], stride, x.shape[1: 3]))
+        slices = tuple(
+            slice(b, b + s * len_, s)
+            for b, s, len_ in zip([i, j], stride, x.shape[1: 3])
+        )
         img[(slice(None),) + slices] += x[..., i, j, :]
     return img
 
 
 def patch2img(x, stride, shape):
+    """Transform patches to an image."""
     img = np.zeros(shape, dtype=x.dtype)
     kx, ky = x.shape[3:5]
     patch = img2patch(img, (kx, ky), stride)
@@ -72,6 +77,7 @@ def patch2img(x, stride, shape):
 
 
 def patch2img_no_overlap(x, stride, shape):
+    """Transform patches to an image without overlaps."""
     img = np.zeros(shape, dtype=x.dtype)
     patch = img2patch(img, x.shape[3:5], stride)
     patch += x
diff --git a/prml/nn/io/__init__.py b/prml/nn/io/__init__.py
index de353b927..173436482 100755
--- a/prml/nn/io/__init__.py
+++ b/prml/nn/io/__init__.py
@@ -1 +1,3 @@
+# flake8: noqa
+
 from prml.nn.io.io import save_parameter, load_parameter, save_object, load_object
diff --git a/prml/nn/io/io.py b/prml/nn/io/io.py
index 3eb038841..528353ddf 100755
--- a/prml/nn/io/io.py
+++ b/prml/nn/io/io.py
@@ -14,10 +14,10 @@ def load_parameter(filename: str, parameter: dict):
 
 
 def save_object(filename: str, obj):
-    with open(filename, "wb") as file:
-        pickle.dump(obj, file)
+    with open(filename, "wb") as f:
+        pickle.dump(obj, f)
 
 
 def load_object(filename: str, obj):
-    with open(filename, "rb") as file:
-        return pickle.load(filename)
+    with open(filename, "rb") as f:
+        return pickle.load(f)
diff --git a/prml/nn/loss/__init__.py b/prml/nn/loss/__init__.py
index bf16dcd07..7ede1651d 100755
--- a/prml/nn/loss/__init__.py
+++ b/prml/nn/loss/__init__.py
@@ -1,3 +1,12 @@
 from prml.nn.loss.kl import kl_divergence
 from prml.nn.loss.sigmoid_cross_entropy import sigmoid_cross_entropy
 from prml.nn.loss.softmax_cross_entropy import softmax_cross_entropy
+
+
+_functions = [kl_divergence, sigmoid_cross_entropy, softmax_cross_entropy]
+
+
+__all__ = [_f.__name__ for _f in _functions]
+
+
+del _functions
diff --git a/prml/nn/loss/kl.py b/prml/nn/loss/kl.py
index 12a746509..86da113e5 100755
--- a/prml/nn/loss/kl.py
+++ b/prml/nn/loss/kl.py
@@ -1,5 +1,3 @@
-import numpy as np
-from prml.nn.function import Function
 from prml.nn.distribution.bernoulli import Bernoulli
 from prml.nn.distribution.categorical import Categorical
 from prml.nn.distribution.gaussian import Gaussian
@@ -34,8 +32,10 @@ def kl_divergence(q, p, data=None):
 
 
 def kl_bernoulli(q, p):
-    return (q.mean - 1) * (q.logit - p.logit) \
+    return (
+        (q.mean - 1) * (q.logit - p.logit)
         - softplus(-q.logit) + softplus(p.logit)
+    )
 
 
 def kl_categorical(q, p):
diff --git a/prml/nn/loss/sigmoid_cross_entropy.py b/prml/nn/loss/sigmoid_cross_entropy.py
index c70589b27..6fda1a93c 100755
--- a/prml/nn/loss/sigmoid_cross_entropy.py
+++ b/prml/nn/loss/sigmoid_cross_entropy.py
@@ -1,4 +1,5 @@
 import numpy as np
+
 from prml.nn.function import Function
 
 
diff --git a/prml/nn/loss/softmax_cross_entropy.py b/prml/nn/loss/softmax_cross_entropy.py
index 59616b9a6..baac12f6e 100755
--- a/prml/nn/loss/softmax_cross_entropy.py
+++ b/prml/nn/loss/softmax_cross_entropy.py
@@ -1,5 +1,6 @@
 import numpy as np
 from scipy.special import logsumexp
+
 from prml.nn.function import Function
 
 
diff --git a/prml/nn/math/add.py b/prml/nn/math/add.py
index 69ef4e30e..d184fbf3f 100755
--- a/prml/nn/math/add.py
+++ b/prml/nn/math/add.py
@@ -1,4 +1,5 @@
 import numpy as np
+
 from prml.nn.function import Function
 
 
diff --git a/prml/nn/math/exp.py b/prml/nn/math/exp.py
index 28878cd28..51a9f3faa 100755
--- a/prml/nn/math/exp.py
+++ b/prml/nn/math/exp.py
@@ -1,4 +1,5 @@
 import numpy as np
+
 from prml.nn.function import Function
 
 
diff --git a/prml/nn/math/log.py b/prml/nn/math/log.py
index 4a6ec358c..b47a689e3 100755
--- a/prml/nn/math/log.py
+++ b/prml/nn/math/log.py
@@ -1,4 +1,5 @@
 import numpy as np
+
 from prml.nn.function import Function
 
 
diff --git a/prml/nn/math/multiply.py b/prml/nn/math/multiply.py
index 5a53839ad..755506fea 100755
--- a/prml/nn/math/multiply.py
+++ b/prml/nn/math/multiply.py
@@ -1,4 +1,3 @@
-import numpy as np
 from prml.nn.function import Function
 
 
diff --git a/prml/nn/math/power.py b/prml/nn/math/power.py
index b84c073d9..589b0037d 100755
--- a/prml/nn/math/power.py
+++ b/prml/nn/math/power.py
@@ -1,4 +1,5 @@
 import numpy as np
+
 from prml.nn.function import Function
 
 
diff --git a/prml/nn/math/product.py b/prml/nn/math/product.py
index d9448f699..2379fb4c9 100755
--- a/prml/nn/math/product.py
+++ b/prml/nn/math/product.py
@@ -1,4 +1,5 @@
 import numpy as np
+
 from prml.nn.function import Function
 
 
diff --git a/prml/nn/math/sqrt.py b/prml/nn/math/sqrt.py
index 9938c1709..8697aba69 100755
--- a/prml/nn/math/sqrt.py
+++ b/prml/nn/math/sqrt.py
@@ -1,4 +1,5 @@
 import numpy as np
+
 from prml.nn.function import Function
 
 
diff --git a/prml/nn/math/square.py b/prml/nn/math/square.py
index 09af3d425..e10ed0af8 100755
--- a/prml/nn/math/square.py
+++ b/prml/nn/math/square.py
@@ -1,4 +1,5 @@
 import numpy as np
+
 from prml.nn.function import Function
 
 
diff --git a/prml/nn/math/subtract.py b/prml/nn/math/subtract.py
index 8ade19c9f..5c88d9b71 100755
--- a/prml/nn/math/subtract.py
+++ b/prml/nn/math/subtract.py
@@ -1,8 +1,9 @@
-import numpy as np
 from prml.nn.function import Function
 
 
 class Subtract(Function):
+    """Subtraction function."""
+
     enable_auto_broadcast = True
 
     @staticmethod
@@ -15,8 +16,10 @@ def _backward(delta, x, y):
 
 
 def subtract(x, y):
+    """Subtract."""
     return Subtract().forward(x, y)
 
 
 def rsubtract(x, y):
+    """Reverse subtract."""
     return Subtract().forward(y, x)
diff --git a/prml/nn/math/sum.py b/prml/nn/math/sum.py
index fda2da2ee..a027046fb 100755
--- a/prml/nn/math/sum.py
+++ b/prml/nn/math/sum.py
@@ -1,4 +1,5 @@
 import numpy as np
+
 from prml.nn.function import Function
 
 
diff --git a/prml/nn/nonlinear/log_softmax.py b/prml/nn/nonlinear/log_softmax.py
index 01757a752..1d44fd0fc 100755
--- a/prml/nn/nonlinear/log_softmax.py
+++ b/prml/nn/nonlinear/log_softmax.py
@@ -1,9 +1,11 @@
 import numpy as np
 from scipy.special import logsumexp
+
 from prml.nn.function import Function
 
 
 class LogSoftmax(Function):
+    """Log-softmax function."""
 
     def _forward(self, x):
         self.output = x - logsumexp(x, axis=-1, keepdims=True)
@@ -16,4 +18,5 @@ def _backward(self, delta, x):
 
 
 def log_softmax(x):
+    """Return log-softmax transformation of the input."""
     return LogSoftmax().forward(x)
diff --git a/prml/nn/nonlinear/logit.py b/prml/nn/nonlinear/logit.py
index d04704629..973eb0cbb 100755
--- a/prml/nn/nonlinear/logit.py
+++ b/prml/nn/nonlinear/logit.py
@@ -1,4 +1,5 @@
 import numpy as np
+
 from prml.nn.function import Function
 
 
diff --git a/prml/nn/nonlinear/sigmoid.py b/prml/nn/nonlinear/sigmoid.py
index 1f60b8a95..db7c1c4c4 100755
--- a/prml/nn/nonlinear/sigmoid.py
+++ b/prml/nn/nonlinear/sigmoid.py
@@ -1,4 +1,5 @@
 import numpy as np
+
 from prml.nn.function import Function
 
 
diff --git a/prml/nn/nonlinear/softplus.py b/prml/nn/nonlinear/softplus.py
index 38a29a372..f5f03ecdc 100755
--- a/prml/nn/nonlinear/softplus.py
+++ b/prml/nn/nonlinear/softplus.py
@@ -1,4 +1,5 @@
 import numpy as np
+
 from prml.nn.function import Function
 
 
diff --git a/prml/nn/nonlinear/tanh.py b/prml/nn/nonlinear/tanh.py
index 3ee63e5f2..061caa4a9 100755
--- a/prml/nn/nonlinear/tanh.py
+++ b/prml/nn/nonlinear/tanh.py
@@ -1,4 +1,5 @@
 import numpy as np
+
 from prml.nn.function import Function
 
 
diff --git a/prml/nn/normalization/__init__.py b/prml/nn/normalization/__init__.py
index 43fef92e1..3f67d6f75 100755
--- a/prml/nn/normalization/__init__.py
+++ b/prml/nn/normalization/__init__.py
@@ -1 +1,4 @@
 from prml.nn.normalization.batch_normalization import BatchNormalization
+
+
+__all__ = ['BatchNormalization']
diff --git a/prml/nn/normalization/batch_normalization.py b/prml/nn/normalization/batch_normalization.py
index 4542cfa6c..17f72c6ee 100755
--- a/prml/nn/normalization/batch_normalization.py
+++ b/prml/nn/normalization/batch_normalization.py
@@ -1,4 +1,5 @@
 import numpy as np
+
 from prml.nn.array.ones import ones
 from prml.nn.array.zeros import zeros
 from prml.nn.config import config
diff --git a/prml/nn/optimizer/__init__.py b/prml/nn/optimizer/__init__.py
index 77632f993..d6dabdd87 100755
--- a/prml/nn/optimizer/__init__.py
+++ b/prml/nn/optimizer/__init__.py
@@ -1,3 +1,5 @@
+# flake8: noqa
+
 from prml.nn.optimizer.ada_delta import AdaDelta
 from prml.nn.optimizer.ada_grad import AdaGrad
 from prml.nn.optimizer.adam import Adam
diff --git a/prml/nn/optimizer/ada_delta.py b/prml/nn/optimizer/ada_delta.py
index 314ede339..387c977f4 100644
--- a/prml/nn/optimizer/ada_delta.py
+++ b/prml/nn/optimizer/ada_delta.py
@@ -1,4 +1,5 @@
 import numpy as np
+
 from prml.nn.config import config
 from prml.nn.optimizer.optimizer import Optimizer
 
diff --git a/prml/nn/optimizer/ada_grad.py b/prml/nn/optimizer/ada_grad.py
index e98442cf7..bb1ee5f0f 100644
--- a/prml/nn/optimizer/ada_grad.py
+++ b/prml/nn/optimizer/ada_grad.py
@@ -1,4 +1,5 @@
 import numpy as np
+
 from prml.nn.config import config
 from prml.nn.optimizer.optimizer import Optimizer
 
@@ -27,7 +28,7 @@ def update(self):
         for key in self.parameter:
             param, G = self.parameter[key], self.G[key]
             if param.grad is None:
-                    continue
+                continue
             grad = param.grad
             G += grad ** 2
             param.value += self.learning_rate * grad / (np.sqrt(G) + self.epsilon)
diff --git a/prml/nn/optimizer/adam.py b/prml/nn/optimizer/adam.py
index 7d2d6414f..63280178d 100755
--- a/prml/nn/optimizer/adam.py
+++ b/prml/nn/optimizer/adam.py
@@ -1,4 +1,5 @@
 import numpy as np
+
 from prml.nn.config import config
 from prml.nn.optimizer.optimizer import Optimizer
 
diff --git a/prml/nn/optimizer/momentum.py b/prml/nn/optimizer/momentum.py
index cb50e18c9..9b672f395 100644
--- a/prml/nn/optimizer/momentum.py
+++ b/prml/nn/optimizer/momentum.py
@@ -1,4 +1,5 @@
 import numpy as np
+
 from prml.nn.optimizer.optimizer import Optimizer
 
 
diff --git a/prml/nn/optimizer/optimizer.py b/prml/nn/optimizer/optimizer.py
index 784f6d241..42ee6167d 100755
--- a/prml/nn/optimizer/optimizer.py
+++ b/prml/nn/optimizer/optimizer.py
@@ -2,7 +2,7 @@ class Optimizer(object):
 
     def __init__(self, parameter: dict, learning_rate: float):
         if isinstance(parameter, list):
-            self.parameter = {f"parameter{i}" : param for i, param in enumerate(parameter)}
+            self.parameter = {f"parameter{i}": param for i, param in enumerate(parameter)}
         elif isinstance(parameter, dict):
             self.parameter = parameter
         self.learning_rate = learning_rate
diff --git a/prml/nn/optimizer/rmsprop.py b/prml/nn/optimizer/rmsprop.py
index 5271ba36e..2e125fab3 100644
--- a/prml/nn/optimizer/rmsprop.py
+++ b/prml/nn/optimizer/rmsprop.py
@@ -1,4 +1,5 @@
 import numpy as np
+
 from prml.nn.optimizer.optimizer import Optimizer
 
 
diff --git a/prml/nn/random/__init__.py b/prml/nn/random/__init__.py
index c961637ac..494f18319 100755
--- a/prml/nn/random/__init__.py
+++ b/prml/nn/random/__init__.py
@@ -1,3 +1,11 @@
 from prml.nn.random.dropout import dropout
 from prml.nn.random.normal import normal, truncnormal
 from prml.nn.random.uniform import uniform
+
+_functions = [dropout, normal, truncnormal, uniform]
+
+
+__all__ = [_f.__name__ for _f in _functions]
+
+
+del _functions
diff --git a/prml/nn/random/dropout.py b/prml/nn/random/dropout.py
index 536884563..62eab2094 100755
--- a/prml/nn/random/dropout.py
+++ b/prml/nn/random/dropout.py
@@ -1,4 +1,5 @@
 import numpy as np
+
 from prml.nn.function import Function
 
 
diff --git a/prml/nn/random/normal.py b/prml/nn/random/normal.py
index 981e239cb..ee40dc674 100755
--- a/prml/nn/random/normal.py
+++ b/prml/nn/random/normal.py
@@ -1,11 +1,14 @@
 import numpy as np
 from scipy.stats import truncnorm
+
 from prml.nn.array.array import asarray
 
 
 def normal(mean, std, size):
+    """Return a random sample from normal distribution."""
     return asarray(np.random.normal(mean, std, size))
 
 
-def truncnormal(min, max, scale, size):
-    return asarray(truncnorm(a=min, b=max, scale=scale).rvs(size))
+def truncnormal(min_, max_, scale, size):
+    """Return a random sample from trunc-normal distribution."""
+    return asarray(truncnorm(a=min_, b=max_, scale=scale).rvs(size))
diff --git a/prml/nn/random/uniform.py b/prml/nn/random/uniform.py
index 2c6396740..4f9b2905d 100755
--- a/prml/nn/random/uniform.py
+++ b/prml/nn/random/uniform.py
@@ -1,6 +1,7 @@
 import numpy as np
+
 from prml.nn.array.array import asarray
 
 
-def uniform(min, max, size):
-    return asarray(np.random.uniform(min, max, size))
+def uniform(min_, max_, size):
+    return asarray(np.random.uniform(min_, max_, size))
diff --git a/prml/preprocess/sigmoidal.py b/prml/preprocess/sigmoidal.py
index 0d926ff0c..bb3af47d4 100644
--- a/prml/preprocess/sigmoidal.py
+++ b/prml/preprocess/sigmoidal.py
@@ -2,15 +2,13 @@
 
 
 class SigmoidalFeature(object):
-    """
-    Sigmoidal features
+    """Sigmoidal features.
 
     1 / (1 + exp((m - x) @ c)
     """
 
     def __init__(self, mean, coef=1):
-        """
-        construct sigmoidal features
+        """Initialize sigmoidal features.
 
         Parameters
         ----------
@@ -38,8 +36,7 @@ def _sigmoid(self, x, mean):
         return np.tanh((x - mean) @ self.coef * 0.5) * 0.5 + 0.5
 
     def transform(self, x):
-        """
-        transform input array with sigmoidal features
+        """Transform input array with sigmoidal features.
 
         Parameters
         ----------
diff --git a/prml/rv/bernoulli.py b/prml/rv/bernoulli.py
index 489e21b30..80f35e1cf 100644
--- a/prml/rv/bernoulli.py
+++ b/prml/rv/bernoulli.py
@@ -1,4 +1,5 @@
 import numpy as np
+
 from prml.rv.rv import RandomVariable
 from prml.rv.beta import Beta
 
@@ -63,50 +64,48 @@ def shape(self):
         else:
             return None
 
-    def _fit(self, X):
+    def _fit(self, x):
         if isinstance(self.mu, Beta):
-            self._bayes(X)
+            self._bayes(x)
         elif isinstance(self.mu, RandomVariable):
             raise NotImplementedError
         else:
-            self._ml(X)
+            self._ml(x)
 
-    def _ml(self, X):
-        n_zeros = np.count_nonzero((X == 0).astype(int))
-        n_ones = np.count_nonzero((X == 1).astype(int))
-        assert X.size == n_zeros + n_ones, (
-            "{X.size} is not equal to {n_zeros} plus {n_ones}"
+    def _ml(self, x):
+        n_zeros = np.count_nonzero((x == 0).astype(int))
+        n_ones = np.count_nonzero((x == 1).astype(int))
+        assert x.size == n_zeros + n_ones, (
+            "{x.size} is not equal to {n_zeros} plus {n_ones}"
         )
-        self.mu = np.mean(X, axis=0)
+        self.mu = np.mean(x, axis=0)
 
-    def _map(self, X):
+    def _map(self, x):
         assert isinstance(self.mu, Beta)
-        assert X.shape[1:] == self.mu.shape
-        n_ones = (X == 1).sum(axis=0)
-        n_zeros = (X == 0).sum(axis=0)
-        assert X.size == n_zeros.sum() + n_ones.sum(), (
-            f"{X.size} is not equal to {n_zeros} plus {n_ones}"
+        assert x.shape[1:] == self.mu.shape
+        n_ones = (x == 1).sum(axis=0)
+        n_zeros = (x == 0).sum(axis=0)
+        assert x.size == n_zeros.sum() + n_ones.sum(), (
+            f"{x.size} is not equal to {n_zeros} plus {n_ones}"
         )
         n_ones = n_ones + self.mu.n_ones
         n_zeros = n_zeros + self.mu.n_zeros
         self.prob = (n_ones - 1) / (n_ones + n_zeros - 2)
 
-    def _bayes(self, X):
+    def _bayes(self, x):
         assert isinstance(self.mu, Beta)
-        assert X.shape[1:] == self.mu.shape
-        n_ones = (X == 1).sum(axis=0)
-        n_zeros = (X == 0).sum(axis=0)
-        assert X.size == n_zeros.sum() + n_ones.sum(), (
-            "input X must only has 0 or 1"
+        assert x.shape[1:] == self.mu.shape
+        n_ones = (x == 1).sum(axis=0)
+        n_zeros = (x == 0).sum(axis=0)
+        assert x.size == n_zeros.sum() + n_ones.sum(), (
+            "input x must only has 0 or 1"
         )
         self.mu.n_zeros += n_zeros
         self.mu.n_ones += n_ones
 
-    def _pdf(self, X):
-        assert isinstance(mu, np.ndarray)
-        return np.prod(
-            self.mu ** X * (1 - self.mu) ** (1 - X)
-        )
+    def _pdf(self, x):
+        assert isinstance(self.mu, np.ndarray)
+        return np.prod(self.mu ** x * (1 - self.mu) ** (1 - x))
 
     def _draw(self, sample_size=1):
         if isinstance(self.mu, np.ndarray):
diff --git a/prml/rv/bernoulli_mixture.py b/prml/rv/bernoulli_mixture.py
index 0c6023488..7cc1f736b 100644
--- a/prml/rv/bernoulli_mixture.py
+++ b/prml/rv/bernoulli_mixture.py
@@ -58,44 +58,44 @@ def coef(self, coef):
             assert coef is None
             self.parameter["coef"] = np.ones(self.n_components) / self.n_components
 
-    def _log_bernoulli(self, X):
+    def _log_bernoulli(self, x):
         np.clip(self.mu, 1e-10, 1 - 1e-10, out=self.mu)
         return (
-            X[:, None, :] * np.log(self.mu)
-            + (1 - X[:, None, :]) * np.log(1 - self.mu)
+            x[:, None, :] * np.log(self.mu)
+            + (1 - x[:, None, :]) * np.log(1 - self.mu)
         ).sum(axis=-1)
 
-    def _fit(self, X):
-        self.mu = np.random.uniform(0.25, 0.75, size=(self.n_components, np.size(X, 1)))
+    def _fit(self, x):
+        self.mu = np.random.uniform(0.25, 0.75, size=(self.n_components, np.size(x, 1)))
         params = np.hstack((self.mu.ravel(), self.coef.ravel()))
         while True:
-            resp = self._expectation(X)
-            self._maximization(X, resp)
+            resp = self._expectation(x)
+            self._maximization(x, resp)
             new_params = np.hstack((self.mu.ravel(), self.coef.ravel()))
             if np.allclose(params, new_params):
                 break
             else:
                 params = new_params
 
-    def _expectation(self, X):
-        log_resps = np.log(self.coef) + self._log_bernoulli(X)
+    def _expectation(self, x):
+        log_resps = np.log(self.coef) + self._log_bernoulli(x)
         log_resps -= logsumexp(log_resps, axis=-1)[:, None]
         resps = np.exp(log_resps)
         return resps
 
-    def _maximization(self, X, resp):
+    def _maximization(self, x, resp):
         Nk = np.sum(resp, axis=0)
-        self.coef = Nk / len(X)
-        self.mu = (X.T @ resp / Nk).T
+        self.coef = Nk / len(x)
+        self.mu = (x.T @ resp / Nk).T
 
-    def classify(self, X):
+    def classify(self, x):
         """
         classify input
         max_z p(z|x, theta)
 
         Parameters
         ----------
-        X : (sample_size, ndim) ndarray
+        x : (sample_size, ndim) ndarray
             input
 
         Returns
@@ -103,16 +103,16 @@ def classify(self, X):
         output : (sample_size,) ndarray
             corresponding cluster index
         """
-        return np.argmax(self.classify_proba(X), axis=1)
+        return np.argmax(self.classify_proba(x), axis=1)
 
-    def classfiy_proba(self, X):
+    def classfiy_proba(self, x):
         """
         posterior probability of cluster
         p(z|x,theta)
 
         Parameters
         ----------
-        X : (sample_size, ndim) ndarray
+        x : (sample_size, ndim) ndarray
             input
 
         Returns
@@ -120,4 +120,4 @@ def classfiy_proba(self, X):
         output : (sample_size, n_components) ndarray
             posterior probability of cluster
         """
-        return self._expectation(X)
+        return self._expectation(x)
diff --git a/prml/rv/categorical.py b/prml/rv/categorical.py
index 81a47aca3..3ce8c3997 100644
--- a/prml/rv/categorical.py
+++ b/prml/rv/categorical.py
@@ -1,6 +1,7 @@
 import numpy as np
-from prml.rv.rv import RandomVariable
+
 from prml.rv.dirichlet import Dirichlet
+from prml.rv.rv import RandomVariable
 
 
 class Categorical(RandomVariable):
@@ -65,38 +66,38 @@ def shape(self):
         else:
             return None
 
-    def _check_input(self, X):
-        assert X.ndim == 2
-        assert (X >= 0).all()
-        assert (X.sum(axis=-1) == 1).all()
+    def _check_input(self, x):
+        assert x.ndim == 2
+        assert (x >= 0).all()
+        assert (x.sum(axis=-1) == 1).all()
 
-    def _fit(self, X):
+    def _fit(self, x):
         if isinstance(self.mu, Dirichlet):
-            self._bayes(X)
+            self._bayes(x)
         elif isinstance(self.mu, RandomVariable):
             raise NotImplementedError
         else:
-            self._ml(X)
+            self._ml(x)
 
-    def _ml(self, X):
-        self._check_input(X)
-        self.mu = np.mean(X, axis=0)
+    def _ml(self, x):
+        self._check_input(x)
+        self.mu = np.mean(x, axis=0)
 
-    def _map(self, X):
-        self._check_input(X)
+    def _map(self, x):
+        self._check_input(x)
         assert isinstance(self.mu, Dirichlet)
-        alpha = self.mu.alpha + X.sum(axis=0)
+        alpha = self.mu.alpha + x.sum(axis=0)
         self.mu = (alpha - 1) / (alpha - 1).sum()
 
-    def _bayes(self, X):
-        self._check_input(X)
+    def _bayes(self, x):
+        self._check_input(x)
         assert isinstance(self.mu, Dirichlet)
-        self.mu.alpha += X.sum(axis=0)
+        self.mu.alpha += x.sum(axis=0)
 
-    def _pdf(self, X):
-        self._check_input(X)
+    def _pdf(self, x):
+        self._check_input(x)
         assert isinstance(self.mu, np.ndarray)
-        return np.prod(self.mu ** X, axis=-1)
+        return np.prod(self.mu ** x, axis=-1)
 
     def _draw(self, sample_size=1):
         assert isinstance(self.mu, np.ndarray)
diff --git a/prml/rv/gamma.py b/prml/rv/gamma.py
index 9898b0aab..5efec78b1 100644
--- a/prml/rv/gamma.py
+++ b/prml/rv/gamma.py
@@ -81,11 +81,11 @@ def shape(self):
     def size(self):
         return self.a.size
 
-    def _pdf(self, X):
+    def _pdf(self, x):
         return (
             self.b ** self.a
-            * X ** (self.a - 1)
-            * np.exp(-self.b * X)
+            * x ** (self.a - 1)
+            * np.exp(-self.b * x)
             / gamma(self.a))
 
     def _draw(self, sample_size=1):
diff --git a/prml/rv/gaussian.py b/prml/rv/gaussian.py
index d4001691e..c487df94d 100644
--- a/prml/rv/gaussian.py
+++ b/prml/rv/gaussian.py
@@ -1,4 +1,5 @@
 import numpy as np
+
 from prml.rv.rv import RandomVariable
 from prml.rv.gamma import Gamma
 
@@ -107,61 +108,61 @@ def shape(self):
         else:
             return None
 
-    def _fit(self, X):
+    def _fit(self, x):
         mu_is_gaussian = isinstance(self.mu, Gaussian)
         tau_is_gamma = isinstance(self.tau, Gamma)
         if mu_is_gaussian and tau_is_gamma:
             raise NotImplementedError
         elif mu_is_gaussian:
-            self._bayes_mu(X)
+            self._bayes_mu(x)
         elif tau_is_gamma:
-            self._bayes_tau(X)
+            self._bayes_tau(x)
         else:
-            self._ml(X)
+            self._ml(x)
 
-    def _ml(self, X):
-        self.mu = np.mean(X, axis=0)
-        self.var = np.var(X, axis=0)
+    def _ml(self, x):
+        self.mu = np.mean(x, axis=0)
+        self.var = np.var(x, axis=0)
 
-    def _map(self, X):
+    def _map(self, x):
         assert isinstance(self.mu, Gaussian)
         assert isinstance(self.var, np.ndarray)
-        N = len(X)
-        mu = np.mean(X, 0)
+        N = len(x)
+        mu = np.mean(x, 0)
         self.mu = (
             (self.tau * self.mu.mu + N * self.mu.tau * mu)
             / (N * self.mu.tau + self.tau)
         )
 
-    def _bayes_mu(self, X):
-        N = len(X)
-        mu = np.mean(X, 0)
+    def _bayes_mu(self, x):
+        N = len(x)
+        mu = np.mean(x, 0)
         tau = self.mu.tau + N * self.tau
         self.mu = Gaussian(
             mu=(self.mu.mu * self.mu.tau + N * mu * self.tau) / tau,
             tau=tau
         )
 
-    def _bayes_tau(self, X):
-        N = len(X)
-        var = np.var(X, axis=0)
+    def _bayes_tau(self, x):
+        N = len(x)
+        var = np.var(x, axis=0)
         a = self.tau.a + 0.5 * N
         b = self.tau.b + 0.5 * N * var
         self.tau = Gamma(a, b)
 
-    def _bayes(self, X):
-        N = len(X)
+    def _bayes(self, x):
+        N = len(x)
         mu_is_gaussian = isinstance(self.mu, Gaussian)
         tau_is_gamma = isinstance(self.tau, Gamma)
         if mu_is_gaussian and not tau_is_gamma:
-            mu = np.mean(X, 0)
+            mu = np.mean(x, 0)
             tau = self.mu.tau + N * self.tau
             self.mu = Gaussian(
                 mu=(self.mu.mu * self.mu.tau + N * mu * self.tau) / tau,
                 tau=tau
             )
         elif not mu_is_gaussian and tau_is_gamma:
-            var = np.var(X, axis=0)
+            var = np.var(x, axis=0)
             a = self.tau.a + 0.5 * N
             b = self.tau.b + 0.5 * N * var
             self.tau = Gamma(a, b)
@@ -170,8 +171,8 @@ def _bayes(self, X):
         else:
             raise NotImplementedError
 
-    def _pdf(self, X):
-        d = X - self.mu
+    def _pdf(self, x):
+        d = x - self.mu
         return (
             np.exp(-0.5 * self.tau * d ** 2) / np.sqrt(2 * np.pi * self.var)
         )
diff --git a/prml/rv/multivariate_gaussian.py b/prml/rv/multivariate_gaussian.py
index 0d55bddec..8926366af 100644
--- a/prml/rv/multivariate_gaussian.py
+++ b/prml/rv/multivariate_gaussian.py
@@ -1,4 +1,5 @@
 import numpy as np
+
 from prml.rv.rv import RandomVariable
 
 
@@ -85,12 +86,12 @@ def shape(self):
         else:
             return None
 
-    def _fit(self, X):
-        self.mu = np.mean(X, axis=0)
-        self.cov = np.atleast_2d(np.cov(X.T, bias=True))
+    def _fit(self, x):
+        self.mu = np.mean(x, axis=0)
+        self.cov = np.atleast_2d(np.cov(x.T, bias=True))
 
-    def _pdf(self, X):
-        d = X - self.mu
+    def _pdf(self, x):
+        d = x - self.mu
         return (
             np.exp(-0.5 * np.sum(d @ self.tau * d, axis=-1))
             * np.sqrt(np.linalg.det(self.tau))
diff --git a/prml/rv/multivariate_gaussian_mixture.py b/prml/rv/multivariate_gaussian_mixture.py
index 6b70dbb8d..f457c4255 100644
--- a/prml/rv/multivariate_gaussian_mixture.py
+++ b/prml/rv/multivariate_gaussian_mixture.py
@@ -1,4 +1,5 @@
 import numpy as np
+
 from prml.clustering import KMeans
 from prml.rv.rv import RandomVariable
 
@@ -119,20 +120,18 @@ def shape(self):
         else:
             return None
 
-    def _gauss(self, X):
-        d = X[:, None, :] - self.mu
+    def _gauss(self, x):
+        d = x[:, None, :] - self.mu
         D_sq = np.sum(np.einsum('nki,kij->nkj', d, self.cov) * d, -1)
         return (
             np.exp(-0.5 * D_sq)
-            / np.sqrt(
-                np.linalg.det(self.cov) * (2 * np.pi) ** self.ndim
-            )
+            / np.sqrt(np.linalg.det(self.cov) * (2 * np.pi) ** self.ndim)
         )
 
-    def _fit(self, X):
-        cov = np.cov(X.T)
+    def _fit(self, x):
+        cov = np.cov(x.T)
         kmeans = KMeans(self.n_components)
-        kmeans.fit(X)
+        kmeans.fit(x)
         self.mu = kmeans.centers
         self.cov = np.array([cov for _ in range(self.n_components)])
         self.coef = np.ones(self.n_components) / self.n_components
@@ -142,8 +141,8 @@ def _fit(self, X):
              self.coef.ravel())
         )
         while True:
-            stats = self._expectation(X)
-            self._maximization(X, stats)
+            stats = self._expectation(x)
+            self._maximization(x, stats)
             new_params = np.hstack(
                 (self.mu.ravel(),
                  self.cov.ravel(),
@@ -154,26 +153,26 @@ def _fit(self, X):
             else:
                 params = new_params
 
-    def _expectation(self, X):
-        resps = self.coef * self._gauss(X)
+    def _expectation(self, x):
+        resps = self.coef * self._gauss(x)
         resps /= resps.sum(axis=-1, keepdims=True)
         return resps
 
-    def _maximization(self, X, resps):
+    def _maximization(self, x, resps):
         Nk = np.sum(resps, axis=0)
-        self.coef = Nk / len(X)
-        self.mu = (X.T @ resps / Nk).T
-        d = X[:, None, :] - self.mu
+        self.coef = Nk / len(x)
+        self.mu = (x.T @ resps / Nk).T
+        d = x[:, None, :] - self.mu
         self.cov = np.einsum(
             'nki,nkj->kij', d, d * resps[:, :, None]) / Nk[:, None, None]
 
-    def joint_proba(self, X):
+    def joint_proba(self, x):
         """
-        calculate joint probability p(X, Z)
+        calculate joint probability p(x, Z)
 
         Parameters
         ----------
-        X : (sample_size, n_features) ndarray
+        x : (sample_size, n_features) ndarray
             input data
 
         Returns
@@ -181,20 +180,20 @@ def joint_proba(self, X):
         joint_prob : (sample_size, n_components) ndarray
             joint probability of input and component
         """
-        return self.coef * self._gauss(X)
+        return self.coef * self._gauss(x)
 
-    def _pdf(self, X):
-        joint_prob = self.coef * self._gauss(X)
+    def _pdf(self, x):
+        joint_prob = self.coef * self._gauss(x)
         return np.sum(joint_prob, axis=-1)
 
-    def classify(self, X):
+    def classify(self, x):
         """
         classify input
         max_z p(z|x, theta)
 
         Parameters
         ----------
-        X : (sample_size, ndim) ndarray
+        x : (sample_size, ndim) ndarray
             input
 
         Returns
@@ -202,16 +201,16 @@ def classify(self, X):
         output : (sample_size,) ndarray
             corresponding cluster index
         """
-        return np.argmax(self.classify_proba(X), axis=1)
+        return np.argmax(self.classify_proba(x), axis=1)
 
-    def classify_proba(self, X):
+    def classify_proba(self, x):
         """
         posterior probability of cluster
         p(z|x,theta)
 
         Parameters
         ----------
-        X : (sample_size, ndim) ndarray
+        x : (sample_size, ndim) ndarray
             input
 
         Returns
@@ -219,4 +218,4 @@ def classify_proba(self, X):
         output : (sample_size, n_components) ndarray
             posterior probability of cluster
         """
-        return self._expectation(X)
+        return self._expectation(x)
diff --git a/prml/rv/rv.py b/prml/rv/rv.py
index 5d794c80c..fb1b1f823 100644
--- a/prml/rv/rv.py
+++ b/prml/rv/rv.py
@@ -2,14 +2,14 @@
 
 
 class RandomVariable(object):
-    """
-    base class for random variables
-    """
+    """Base class for random variables."""
 
     def __init__(self):
+        """Initialize a random variable."""
         self.parameter = {}
 
     def __repr__(self):
+        """Representation of the random variable."""
         string = f"{self.__class__.__name__}(\n"
         for key, value in self.parameter.items():
             string += (" " * 4)
@@ -22,6 +22,7 @@ def __repr__(self):
         return string
 
     def __format__(self, indent="4"):
+        """Format the random variable."""
         indent = int(indent)
         string = f"{self.__class__.__name__}(\n"
         for key, value in self.parameter.items():
@@ -34,77 +35,74 @@ def __format__(self, indent="4"):
         string += (" " * (indent - 4)) + ")"
         return string
 
-    def fit(self, X, **kwargs):
-        """
-        estimate parameter(s) of the distribution
+    def fit(self, x, **kwargs):
+        """Estimate parameter(s) of the distribution.
 
         Parameters
         ----------
-        X : np.ndarray
+        x : np.ndarray
             observed data
         """
-        self._check_input(X)
+        self._check_input(x)
         if hasattr(self, "_fit"):
-            self._fit(X, **kwargs)
+            self._fit(x, **kwargs)
         else:
             raise NotImplementedError
 
-    # def ml(self, X, **kwargs):
+    # def ml(self, x, **kwargs):
     #     """
     #     maximum likelihood estimation of the parameter(s)
     #     of the distribution given data
 
     #     Parameters
     #     ----------
-    #     X : (sample_size, ndim) np.ndarray
+    #     x : (sample_size, ndim) np.ndarray
     #         observed data
     #     """
-    #     self._check_input(X)
+    #     self._check_input(x)
     #     if hasattr(self, "_ml"):
-    #         self._ml(X, **kwargs)
+    #         self._ml(x, **kwargs)
     #     else:
     #         raise NotImplementedError
 
-    # def map(self, X, **kwargs):
+    # def map(self, x, **kwargs):
     #     """
     #     maximum a posteriori estimation of the parameter(s)
     #     of the distribution given data
 
     #     Parameters
     #     ----------
-    #     X : (sample_size, ndim) np.ndarray
+    #     x : (sample_size, ndim) np.ndarray
     #         observed data
     #     """
-    #     self._check_input(X)
+    #     self._check_input(x)
     #     if hasattr(self, "_map"):
-    #         self._map(X, **kwargs)
+    #         self._map(x, **kwargs)
     #     else:
     #         raise NotImplementedError
 
-    # def bayes(self, X, **kwargs):
+    # def bayes(self, x, **kwargs):
     #     """
     #     bayesian estimation of the parameter(s)
     #     of the distribution given data
 
     #     Parameters
     #     ----------
-    #     X : (sample_size, ndim) np.ndarray
+    #     x : (sample_size, ndim) np.ndarray
     #         observed data
     #     """
-    #     self._check_input(X)
+    #     self._check_input(x)
     #     if hasattr(self, "_bayes"):
-    #         self._bayes(X, **kwargs)
+    #         self._bayes(x, **kwargs)
     #     else:
     #         raise NotImplementedError
 
-    def pdf(self, X):
-        """
-        compute probability density function
-        p(X|parameter)
+    def pdf(self, x):
+        """Compute probability density function p(x|parameter).
 
         Parameters
         ----------
-        X : (sample_size, ndim) np.ndarray
+        x : (sample_size, ndim) np.ndarray
             input of the function
 
         Returns
@@ -112,15 +110,14 @@ def pdf(self, X):
         p : (sample_size,) np.ndarray
             value of probability density function for each input
         """
-        self._check_input(X)
+        self._check_input(x)
         if hasattr(self, "_pdf"):
-            return self._pdf(X)
+            return self._pdf(x)
         else:
             raise NotImplementedError
 
     def draw(self, sample_size=1):
-        """
-        draw samples from the distribution
+        """Draw samples from the distribution.
 
         Parameters
         ----------
@@ -138,5 +135,5 @@ def draw(self, sample_size=1):
         else:
             raise NotImplementedError
 
-    def _check_input(self, X):
-        assert isinstance(X, np.ndarray)
+    def _check_input(self, x):
+        assert isinstance(x, np.ndarray)
diff --git a/prml/rv/students_t.py b/prml/rv/students_t.py
index 8e61a48b9..5c2f380a1 100644
--- a/prml/rv/students_t.py
+++ b/prml/rv/students_t.py
@@ -80,9 +80,9 @@ def shape(self):
         else:
             return None
 
-    def _fit(self, X, learning_rate=0.01):
-        self.mu = np.mean(X, axis=0)
-        self.tau = 1 / np.var(X, axis=0)
+    def _fit(self, x, learning_rate=0.01):
+        self.mu = np.mean(x, axis=0)
+        self.tau = 1 / np.var(x, axis=0)
         self.dof = 1
         params = np.hstack(
             (self.mu.ravel(),
@@ -90,8 +90,8 @@ def _fit(self, X, learning_rate=0.01):
              self.dof)
         )
         while True:
-            E_eta, E_lneta = self._expectation(X)
-            self._maximization(X, E_eta, E_lneta, learning_rate)
+            E_eta, E_lneta = self._expectation(x)
+            self._maximization(x, E_eta, E_lneta, learning_rate)
             new_params = np.hstack(
                 (self.mu.ravel(),
                  self.tau.ravel(),
@@ -102,27 +102,27 @@ def _fit(self, X, learning_rate=0.01):
             else:
                 params = new_params
 
-    def _expectation(self, X):
-        d = X - self.mu
+    def _expectation(self, x):
+        d = x - self.mu
         a = 0.5 * (self.dof + 1)
         b = 0.5 * (self.dof + self.tau * d ** 2)
         E_eta = a / b
         E_lneta = digamma(a) - np.log(b)
         return E_eta, E_lneta
 
-    def _maximization(self, X, E_eta, E_lneta, learning_rate):
-        self.mu = np.sum(E_eta * X, axis=0) / np.sum(E_eta, axis=0)
-        d = X - self.mu
+    def _maximization(self, x, E_eta, E_lneta, learning_rate):
+        self.mu = np.sum(E_eta * x, axis=0) / np.sum(E_eta, axis=0)
+        d = x - self.mu
         self.tau = 1 / np.mean(E_eta * d ** 2, axis=0)
-        N = len(X)
+        N = len(x)
         self.dof += learning_rate * 0.5 * (
             N * np.log(0.5 * self.dof) + N
             - N * digamma(0.5 * self.dof)
             + np.sum(E_lneta - E_eta, axis=0)
         )
 
-    def _pdf(self, X):
-        d = X - self.mu
+    def _pdf(self, x):
+        d = x - self.mu
         D_sq = self.tau * d ** 2
         return (
             gamma(0.5 * (self.dof + 1))
diff --git a/prml/rv/uniform.py b/prml/rv/uniform.py
index ca5173efd..bafed4e58 100644
--- a/prml/rv/uniform.py
+++ b/prml/rv/uniform.py
@@ -1,17 +1,17 @@
 import numpy as np
+
 from prml.rv.rv import RandomVariable
 
 
 class Uniform(RandomVariable):
-    """
-    Uniform distribution
+    """Random variable that follows a uniform distribution.
+
     p(x|a, b)
     = 1 / ((b_0 - a_0) * (b_1 - a_1)) if a <= x <= b else 0
     """
 
     def __init__(self, low, high):
-        """
-        construct uniform distribution
+        """Initialize a uniform distribution.
 
         Parameters
         ----------
@@ -31,6 +31,7 @@ def __init__(self, low, high):
 
     @property
     def low(self):
+        """Lower bound of the random variable."""
         return self.parameter["low"]
 
     @low.setter
@@ -39,6 +40,7 @@ def low(self, low):
 
     @property
     def high(self):
+        """Higher bound of the random variable."""
         return self.parameter["high"]
 
     @high.setter
@@ -47,23 +49,27 @@ def high(self, high):
 
     @property
     def ndim(self):
+        """Rank of the random variable."""
         return self.low.ndim
 
     @property
     def size(self):
+        """Number of elements in the random variable."""
         return self.low.size
 
     @property
     def shape(self):
+        """Shape of the random variable."""
         return self.low.shape
 
     @property
     def mean(self):
+        """Mean value of the random variable."""
         return 0.5 * (self.low + self.high)
 
-    def _pdf(self, X):
-        higher = np.logical_and.reduce(X >= self.low, 1)
-        lower = np.logical_and.reduce(X <= self.high, 1)
+    def _pdf(self, x):
+        higher = np.logical_and.reduce(x >= self.low, 1)
+        lower = np.logical_and.reduce(x <= self.high, 1)
         return self.value * np.logical_and(higher, lower)
 
     def _draw(self, sample_size=1):
diff --git a/prml/rv/variational_gaussian_mixture.py b/prml/rv/variational_gaussian_mixture.py
index 0dcbc6e38..5a6627f33 100644
--- a/prml/rv/variational_gaussian_mixture.py
+++ b/prml/rv/variational_gaussian_mixture.py
@@ -1,13 +1,22 @@
 import numpy as np
 from scipy.special import digamma, gamma, logsumexp
+
 from prml.rv.rv import RandomVariable
 
 
 class VariationalGaussianMixture(RandomVariable):
 
-    def __init__(self, n_components=1, alpha0=None, m0=None, W0=1., dof0=None, beta0=1.):
-        """
-        construct variational gaussian mixture model
+    def __init__(
+        self,
+        n_components=1,
+        alpha0=None,
+        m0=None,
+        W0=1.,
+        dof0=None,
+        beta0=1.,
+    ):
+        """Initialize variational gaussian mixture model.
+
         Parameters
         ----------
         n_components : int
@@ -34,11 +43,11 @@ def __init__(self, n_components=1, alpha0=None, m0=None, W0=1., dof0=None, beta0
         self.dof0 = dof0
         self.beta0 = beta0
 
-    def _init_params(self, X):
-        sample_size, self.ndim = X.shape
+    def _init_params(self, x):
+        sample_size, self.ndim = x.shape
         self.alpha0 = np.ones(self.n_components) * self.alpha0
         if self.m0 is None:
-            self.m0 = np.mean(X, axis=0)
+            self.m0 = np.mean(x, axis=0)
         else:
             self.m0 = np.zeros(self.ndim) + self.m0
         self.W0 = np.eye(self.ndim) * self.W0
@@ -49,12 +58,13 @@ def _init_params(self, X):
         self.alpha = self.alpha0 + self.component_size
         self.beta = self.beta0 + self.component_size
         indices = np.random.choice(sample_size, self.n_components, replace=False)
-        self.mu = X[indices]
+        self.mu = x[indices]
         self.W = np.tile(self.W0, (self.n_components, 1, 1))
         self.dof = self.dof0 + self.component_size
 
     @property
     def alpha(self):
+        """Alpha parameter."""
         return self.parameter["alpha"]
 
     @alpha.setter
@@ -63,6 +73,7 @@ def alpha(self, alpha):
 
     @property
     def beta(self):
+        """Beta parameter."""
         return self.parameter["beta"]
 
     @beta.setter
@@ -71,6 +82,7 @@ def beta(self, beta):
 
     @property
     def mu(self):
+        """Mean parameter of posterior Wishart distribution."""
         return self.parameter["mu"]
 
     @mu.setter
@@ -79,6 +91,7 @@ def mu(self, mu):
 
     @property
     def W(self):
+        """Weight parameter."""
         return self.parameter["W"]
 
     @W.setter
@@ -87,6 +100,7 @@ def W(self, W):
 
     @property
     def dof(self):
+        """Degree of freedom."""
         return self.parameter["dof"]
 
     @dof.setter
@@ -94,19 +108,20 @@ def dof(self, dof):
         self.parameter["dof"] = dof
 
     def get_params(self):
+        """Get parameters."""
         return self.alpha, self.beta, self.mu, self.W, self.dof
 
-    def _fit(self, X, iter_max=100):
-        self._init_params(X)
+    def _fit(self, x, iter_max=100):
+        self._init_params(x)
         for _ in range(iter_max):
             params = np.hstack([p.flatten() for p in self.get_params()])
-            r = self._variational_expectation(X)
-            self._variational_maximization(X, r)
+            r = self._variational_expectation(x)
+            self._variational_maximization(x, r)
             if np.allclose(params, np.hstack([p.flatten() for p in self.get_params()])):
                 break
 
-    def _variational_expectation(self, X):
-        d = X[:, None, :] - self.mu
+    def _variational_expectation(self, x):
+        d = x[:, None, :] - self.mu
         maha_sq = -0.5 * (
             self.ndim / self.beta
             + self.dof * np.sum(
@@ -118,10 +133,10 @@ def _variational_expectation(self, X):
         r = np.exp(ln_r)
         return r
 
-    def _variational_maximization(self, X, r):
+    def _variational_maximization(self, x, r):
         self.component_size = r.sum(axis=0)
-        Xm = (X.T.dot(r) / self.component_size).T
-        d = X[:, None, :] - Xm
+        Xm = (x.T.dot(r) / self.component_size).T
+        d = x[:, None, :] - Xm
         S = np.einsum('nki,nkj->kij', d, r[:, :, None] * d) / self.component_size[:, None, None]
         self.alpha = self.alpha0 + self.component_size
         self.beta = self.beta0 + self.component_size
@@ -133,38 +148,39 @@ def _variational_maximization(self, X, r):
             + (self.beta0 * self.component_size * np.einsum('ki,kj->kij', d, d).T / (self.beta0 + self.component_size)).T)
         self.dof = self.dof0 + self.component_size
 
-    def classify(self, X):
-        """
-        index of highest posterior of the latent variable
+    def classify(self, x):
+        """Index of highest posterior of the latent variable.
+
         Parameters
         ----------
-        X : (sample_size, ndim) ndarray
+        x : (sample_size, ndim) ndarray
             input
         Returns
         -------
         output : (sample_size, n_components) ndarray
             index of maximum posterior of the latent variable
         """
-        return np.argmax(self._variational_expectation(X), 1)
+        return np.argmax(self._variational_expectation(x), 1)
+
+    def classify_proba(self, x):
+        """Compute posterior of the latent variable.
 
-    def classify_proba(self, X):
-        """
-        compute posterior of the latent variable
         Parameters
         ----------
-        X : (sample_size, ndim) ndarray
+        x : (sample_size, ndim) ndarray
             input
         Returns
         -------
         output : (sample_size, n_components) ndarray
             posterior of the latent variable
         """
-        return self._variational_expectation(X)
+        return self._variational_expectation(x)
 
-    def student_t(self, X):
+    def student_t(self, x):
+        """Student's t probability distribution function."""
         nu = self.dof + 1 - self.ndim
-        L = (nu * self.beta * self.W.T / (1 + self.beta)).T
-        d = X[:, None, :] - self.mu
+        L = (nu * self.beta * self.W.T / (1 + self.beta)).T # noqa
+        d = x[:, None, :] - self.mu
         maha_sq = np.sum(np.einsum('nki,kij->nkj', d, L) * d, axis=-1)
         return (
             gamma(0.5 * (nu + self.ndim))
@@ -172,5 +188,5 @@ def student_t(self, X):
             * (1 + maha_sq / nu) ** (-0.5 * (nu + self.ndim))
             / (gamma(0.5 * nu) * (nu * np.pi) ** (0.5 * self.ndim)))
 
-    def _pdf(self, X):
-        return (self.alpha * self.student_t(X)).sum(axis=-1) / self.alpha.sum()
+    def _pdf(self, x):
+        return (self.alpha * self.student_t(x)).sum(axis=-1) / self.alpha.sum()
diff --git a/prml/sampling/metropolis.py b/prml/sampling/metropolis.py
index 9eebd0878..7b30e7144 100644
--- a/prml/sampling/metropolis.py
+++ b/prml/sampling/metropolis.py
@@ -1,10 +1,10 @@
 import random
+
 import numpy as np
 
 
 def metropolis(func, rv, n, downsample=1):
-    """
-    Metropolis algorithm
+    """Metropolis algorithm.
 
     Parameters
     ----------
diff --git a/prml/sampling/metropolis_hastings.py b/prml/sampling/metropolis_hastings.py
index ce20a1303..27c69e48a 100644
--- a/prml/sampling/metropolis_hastings.py
+++ b/prml/sampling/metropolis_hastings.py
@@ -1,4 +1,5 @@
 import random
+
 import numpy as np
 
 
diff --git a/setup.cfg b/setup.cfg
index eef97bbdc..e8ac11e12 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -24,6 +24,81 @@ develop =
     pep8-naming
 
 [flake8]
+ignore =
+    ; variable "sum" is shadowing a Python builtin
+    A001
+    ; class attribute is shadowing a Python builtin
+    A003
+    ; import statement "sum" is shadowing a Python builtin
+    A004
+    ; No blank lines allowed after function docstring
+    D202
+    ; missing whitespace around arithmetic operator
+    E226
+    ; missing whitespace after ':'
+    E231
+    ; missing whitespace around parameter equals
+    E252
+    ; expected 2 blank lines, found 1
+    E302
+    ; too many blank lines (2)
+    E303
+    ; comparison to False should be 'if cond is False:' or 'if not cond:'
+    E712
+    ; the module is shadowing a Python builtin module
+    A005
+    ; missing trailing comma
+    C812
+    ; Missing docstring in public module
+    D100
+    ; Missing docstring in public class
+    D101
+    ; Missing docstring in public method
+    D102
+    ; Missing docstring in public function
+    D103
+    ; Missing docstring in public package
+    D104
+    ; Missing docstring in magic method
+    D105
+    ; Missing docstring in __init__
+    D107
+    ; One-line docstring should fit on one line with quotes
+    D200
+    ; 1 blank line required between summary line and description
+    D205
+    ; Use r""" if any backslashes in a docstring
+    D301
+    ; First line should end with a period
+    D400
+    ; First line should not be the function's "signature"
+    D402
+    ; First line should be in imperative mood; try rephrasing
+    D401
+    ; First word of the first line should be properly capitalized
+    D403
+    ; missing whitespace after keyword
+    E275
+    ; line too long
+    E501
+    ; Import statements are in the wrong order.
+    I100
+    ;  Imported names are in the wrong order.
+    I101
+    ; Missing newline between import groups.
+    I201
+    ; Multi-line container not broken after opening character
+    JS101
+    ; function name should be lowercase
+    N802
+    ; argument name should be lowercase
+    N803
+    ; variable in function should be lowercase
+    N806
+    ; line break before binary operator
+    W503
+    ; invalid escape sequence
+    W605
 per-file-ignores =
     ; Ignore 'Missing docstring in public module' and 'variable "copyright" is shadowing a python builtin'
     docs/conf.py:A001,D100
diff --git a/test/test_bayesnet/test_discrete.py b/test/test_bayesnet/test_discrete.py
index 7b0e3b850..b55c36b80 100644
--- a/test/test_bayesnet/test_discrete.py
+++ b/test/test_bayesnet/test_discrete.py
@@ -1,5 +1,7 @@
 import unittest
+
 import numpy as np
+
 from prml import bayesnet as bn
 
 
diff --git a/test/test_nn/test_backward.py b/test/test_nn/test_backward.py
index 243afa518..be93c9440 100755
--- a/test/test_nn/test_backward.py
+++ b/test/test_nn/test_backward.py
@@ -1,5 +1,7 @@
 import unittest
+
 import numpy as np
+
 import prml.nn as nn
 
 
diff --git a/test/test_nn/test_distribution/test_bernoulli.py b/test/test_nn/test_distribution/test_bernoulli.py
index 2ec35d097..721050632 100755
--- a/test/test_nn/test_distribution/test_bernoulli.py
+++ b/test/test_nn/test_distribution/test_bernoulli.py
@@ -1,5 +1,7 @@
 import unittest
+
 import numpy as np
+
 import prml.nn as nn
 
 
diff --git a/test/test_nn/test_distribution/test_gaussian.py b/test/test_nn/test_distribution/test_gaussian.py
index dce928dcf..2af50adf5 100755
--- a/test/test_nn/test_distribution/test_gaussian.py
+++ b/test/test_nn/test_distribution/test_gaussian.py
@@ -1,5 +1,7 @@
 import unittest
+
 import numpy as np
+
 import prml.nn as nn
 
 
diff --git a/test/test_nn/test_image/test_convolve2d.py b/test/test_nn/test_image/test_convolve2d.py
index 0ac1ce6dd..e9683f928 100755
--- a/test/test_nn/test_image/test_convolve2d.py
+++ b/test/test_nn/test_image/test_convolve2d.py
@@ -1,4 +1,5 @@
 import unittest
+
 import numpy as np
 from scipy.ndimage.filters import correlate
 import prml.nn as nn
@@ -29,7 +30,10 @@ def test_convolve2d_backward(self):
             output.backward(2 * (output.value - 1))
             x.value -= x.grad * 0.01
             w.value -= w.grad * 0.01
-        self.assertTrue(np.allclose(output.value, 1))
+        self.assertTrue(
+            np.allclose(output.value, 1, rtol=0, atol=1e-2),
+            output.value,
+        )
         self.assertEqual(nn.config.dtype, np.float32)
         self.assertEqual(x.dtype, nn.config.dtype)
         self.assertEqual(w.dtype, nn.config.dtype)
diff --git a/test/test_nn/test_image/test_deconvolve2d.py b/test/test_nn/test_image/test_deconvolve2d.py
index 5b3b6e3dc..24b19f1bf 100755
--- a/test/test_nn/test_image/test_deconvolve2d.py
+++ b/test/test_nn/test_image/test_deconvolve2d.py
@@ -1,6 +1,8 @@
 import unittest
+
 import numpy as np
 from scipy.ndimage.filters import correlate
+
 import prml.nn as nn
 
 
@@ -9,8 +11,15 @@ class TestDeconvolve2d(unittest.TestCase):
     def test_deconvolve2d_forward(self):
         img = np.random.randn(1, 3, 3, 1).astype(np.float32)
         kernel = np.random.randn(3, 3, 1, 1).astype(np.float32)
-        output = nn.deconvolve2d(img, kernel, (1, 1), (0, 0))
-        self.assertTrue(np.allclose(output.value[0,1:-1,1:-1,0], correlate(img[0,:,:,0], kernel[::-1,::-1,0,0], mode="constant")))
+        out = nn.deconvolve2d(img, kernel, (1, 1), (0, 0))
+
+        actual = out.value[0, 1:-1, 1:-1, 0]
+        expect = correlate(
+            img[0, :, :, 0],
+            kernel[::-1, ::-1, 0, 0],
+            mode='constant',
+        )
+        self.assertTrue(np.allclose(actual, expect, 0, 1e-2))
 
     def test_deconvolve2d_backward(self):
         x = nn.random.normal(0, 1, (1, 3, 3, 1))
@@ -22,7 +31,10 @@ def test_deconvolve2d_backward(self):
             output.backward(2 * (output.value - 1))
             x.value -= x.grad * 0.01
             w.value -= w.grad * 0.01
-        self.assertTrue(np.allclose(output.value, 1), output.value)
+        self.assertTrue(
+            np.allclose(output.value, 1, rtol=0, atol=1e-2),
+            output.value,
+        )
 
 
 if __name__ == "__main__":
diff --git a/test/test_nn/test_image/test_max_pooling2d.py b/test/test_nn/test_image/test_max_pooling2d.py
index 7abce6445..ea9e13f38 100755
--- a/test/test_nn/test_image/test_max_pooling2d.py
+++ b/test/test_nn/test_image/test_max_pooling2d.py
@@ -1,5 +1,7 @@
 import unittest
+
 import numpy as np
+
 import prml.nn as nn
 
 
diff --git a/test/test_nn/test_loss/test_sigmoid_cross_entropy.py b/test/test_nn/test_loss/test_sigmoid_cross_entropy.py
index 1438fe68b..55a34debc 100755
--- a/test/test_nn/test_loss/test_sigmoid_cross_entropy.py
+++ b/test/test_nn/test_loss/test_sigmoid_cross_entropy.py
@@ -1,5 +1,7 @@
 import unittest
+
 import numpy as np
+
 import prml.nn as nn
 
 
@@ -16,8 +18,8 @@ def test_sigmoid_cross_entropy(self):
 
         npg = np.random.randn(10, 3)
         loss.backward(npg)
-        self.assertTrue(np.allclose(x.grad, npg * (npy - npt)))
-        self.assertTrue(np.allclose(t.grad, -npg * npx))
+        self.assertTrue(np.allclose(x.grad, npg * (npy - npt), 0, 1e-2))
+        self.assertTrue(np.allclose(t.grad, -npg * npx, 0, 1e-2))
 
 
 if __name__ == "__main__":
diff --git a/test/test_nn/test_math/test_add.py b/test/test_nn/test_math/test_add.py
index dd2478ebf..3e3d05f4d 100755
--- a/test/test_nn/test_math/test_add.py
+++ b/test/test_nn/test_math/test_add.py
@@ -1,5 +1,7 @@
 import unittest
+
 import numpy as np
+
 import prml.nn as nn
 
 
diff --git a/test/test_nn/test_math/test_log.py b/test/test_nn/test_math/test_log.py
index c5f74c111..65a5a2104 100755
--- a/test/test_nn/test_math/test_log.py
+++ b/test/test_nn/test_math/test_log.py
@@ -1,5 +1,7 @@
 import unittest
+
 import numpy as np
+
 import prml.nn as nn
 
 
diff --git a/test/test_nn/test_math/test_matmul.py b/test/test_nn/test_math/test_matmul.py
index c23b0efd9..333b00534 100755
--- a/test/test_nn/test_math/test_matmul.py
+++ b/test/test_nn/test_math/test_matmul.py
@@ -1,5 +1,7 @@
 import unittest
+
 import numpy as np
+
 import prml.nn as nn
 
 
diff --git a/test/test_nn/test_math/test_multiply.py b/test/test_nn/test_math/test_multiply.py
index 016e45915..4b622bb7e 100755
--- a/test/test_nn/test_math/test_multiply.py
+++ b/test/test_nn/test_math/test_multiply.py
@@ -1,5 +1,7 @@
 import unittest
+
 import numpy as np
+
 import prml.nn as nn
 
 
diff --git a/test/test_nn/test_math/test_negative.py b/test/test_nn/test_math/test_negative.py
index 709fd8d51..9b9e250db 100755
--- a/test/test_nn/test_math/test_negative.py
+++ b/test/test_nn/test_math/test_negative.py
@@ -1,5 +1,7 @@
 import unittest
+
 import numpy as np
+
 import prml.nn as nn
 
 
diff --git a/test/test_nn/test_nonlinear/test_log_softmax.py b/test/test_nn/test_nonlinear/test_log_softmax.py
index f3dbc2e8a..759219e3c 100755
--- a/test/test_nn/test_nonlinear/test_log_softmax.py
+++ b/test/test_nn/test_nonlinear/test_log_softmax.py
@@ -1,5 +1,7 @@
 import unittest
+
 import numpy as np
+
 import prml.nn as nn
 
 
diff --git a/test/test_nn/test_nonlinear/test_sigmoid.py b/test/test_nn/test_nonlinear/test_sigmoid.py
index a4b1fcd80..09f9ed73f 100755
--- a/test/test_nn/test_nonlinear/test_sigmoid.py
+++ b/test/test_nn/test_nonlinear/test_sigmoid.py
@@ -1,5 +1,7 @@
 import unittest
+
 import numpy as np
+
 import prml.nn as nn
 
 
diff --git a/test/test_nn/test_nonlinear/test_softmax.py b/test/test_nn/test_nonlinear/test_softmax.py
index 0fa2a74e1..92a34400a 100755
--- a/test/test_nn/test_nonlinear/test_softmax.py
+++ b/test/test_nn/test_nonlinear/test_softmax.py
@@ -1,5 +1,7 @@
 import unittest
+
 import numpy as np
+
 import prml.nn as nn
 
 
diff --git a/test/test_nn/test_nonlinear/test_tanh.py b/test/test_nn/test_nonlinear/test_tanh.py
index 50baa51fc..5586d9d94 100755
--- a/test/test_nn/test_nonlinear/test_tanh.py
+++ b/test/test_nn/test_nonlinear/test_tanh.py
@@ -1,5 +1,7 @@
 import unittest
+
 import numpy as np
+
 import prml.nn as nn