Skip to contentSkip to frontmatterSkip to Backmatter

OptiMask: Figures Generation

This notebook creates the figures for the paper titled “OptiMask: Efficiently Finding the Largest NaN-Free Submatrix” intended for the SciPy 2025 proceedings. Throughout the cells, several PNG images are generated. At the conclusion, these computed PNGs are losslessly compressed using oxipng.

Cyril Joly

# ! pip install optimask pyoxipng

from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
from optimask import OptiMask
from optimask.utils import generate_mar, plot
from oxipng import optimize
[p.unlink() for p in Path('.').glob('*.png')]
DPI = 200

1The problem at hand

Illustrating the trade-off between removing cells or columns in a toy example:

x = np.zeros((12, 5))
x[7, [1, 3]] = np.nan

plt.figure()
plt.subplot(1, 3, 1)
plot(x, show=False, title="original data")
plt.subplot(1, 3, 2)
plot(x, rows_to_remove=[7], show=False, title="remove row")
plt.subplot(1, 3, 3)
plot(x, cols_to_remove=[1, 3], show=False, title="remove columns")
plt.savefig('at_hand_two.png', bbox_inches='tight', dpi=DPI)
plt.show()

2Algorithm Steps

def heights(x, axis=0):
    return (x.shape[axis] - np.argmax(np.flip(x, axis=axis), axis=axis)).astype(np.uint32)


def to_plot(x):
    xp = x.astype(float)
    xp[xp > 0] = np.nan
    return xp

Generation of an example matrix with missing values, with missing at random values plus a contiguous set of 7 missing values.

m, n = 40, 30
x = generate_mar(m=m, n=n, ratio=0.04, rng=1)
x[12:19, 10] = np.nan

plot(x, show=False)
plt.savefig('algo_data.png', bbox_inches='tight', dpi=DPI)
plt.show()


nan_rows, nan_cols = np.isnan(x).nonzero()
nan_rows, nan_cols = np.unique(nan_rows), np.unique(nan_cols)
xp = np.isnan(x[nan_rows][:, nan_cols])
hx, hy = heights(xp, axis=0), heights(xp, axis=1)
plot(to_plot(xp), xticks=hx, yticks=hy, show=False)
plt.savefig('algo_0.png', bbox_inches='tight', dpi=DPI)
plt.show()
hx, hy = heights(xp, axis=0).astype(np.uint32), heights(xp, axis=1).astype(np.uint32)
k = 0
plt.figure(figsize=(12, 5))
while not (OptiMask.is_decreasing(hx) and OptiMask.is_decreasing(hy)):
    axis = k % 2
    p_step = np.argsort(-heights(xp, axis=axis))
    if axis == 0:
        xp = xp[:, p_step]
    if axis == 1:
        xp = xp[p_step]
    hx, hy = heights(xp, axis=0).astype(np.uint32), heights(xp, axis=1).astype(
        np.uint32
    )
    plt.subplot(1, 3, k+1)
    plot(to_plot(xp), xticks=hx, yticks=hy, title=f"Iteration {k+1}", show=False)
    k += 1
plt.tight_layout()
plt.savefig('algo_iterations.png', bbox_inches='tight', dpi=DPI)
plt.show()
def rectangle(xmin, ymin, xmax, ymax, c='k', lw=0.4):
    x = [xmin, xmax, xmax, xmin, xmin]
    y = [ymin, ymin, ymax, ymax, ymin]
    plt.plot(x, y, c=c, linestyle='--', lw=lw)
opt_area = 0
plot(np.zeros_like(x), show=False)
plot(to_plot(xp), xticks=hx, yticks=hy, show=False)
for i0 in range(len(hx)):
    if (i0 == 0) or (i0 > 0 and (hx[i0] < hx[i0-1])):
        rectangle(i0, hx[i0], n, m)
        if (n-i0)*(m-hx[i0]) > opt_area:
            opt_area, opt_index = (n-i0)*(m-hx[i0]), i0
rectangle(len(hx), 0, n, m)
rectangle(opt_index, hx[opt_index], n, m, c='r', lw=1)
plt.savefig('algo_result_permuted_space.png', bbox_inches='tight', dpi=DPI)
plt.show()
rows, cols = OptiMask(n_tries=10000).solve(x)
plot(x, rows, cols, show=False)
plt.savefig('algo_result.png', bbox_inches='tight', dpi=DPI)
plt.show()
[optimize(path) for path in Path('.').glob('*.png')]
License

Copyright © 2025 Joly. This is an open-access article distributed under the terms of the Creative Commons Attribution 4.0 International license, which enables reusers to distribute, remix, adapt, and build upon the material in any medium or format, so long as attribution is given to the creator.

Abbreviations
NaN
Not a Number