import pathlib
import cv2
import numpy as np
import numpy.typing as npt
import PIL
import torch
[docs]
def load_image(
path: pathlib.Path, flags: int = cv2.IMREAD_COLOR, as_rgb: bool = True
) -> npt.NDArray:
"""
Load the given image using OpenCV.
``flags`` correspond to the ``cv2.IMREAD_`` flags from OpenCV. Please see the full
list of options
`here <https://docs.opencv.org/3.4/d8/d6a/group__imgcodecs__flags.html>`__. If no
value is passed, the image will be loaded using ``cv2.IMREAD_COLOR``, which will load
it as an 8-bit BGR image.
``as_rgb`` can be used to automatically convert the image from OpenCV's default BGR to
RGB using the following logic:
* If the image is grayscale, then it is returned as is.
* If the image is a 3-channel BGR, it is converted to RGB.
* If the image is a 4-channel BGRA, it is converted to RGBA.
If all these checks fail, the image is returned as is.
Args:
path: the path to the image to load.
flags: the OpenCV flags to use when loading.
as_rgb: if true, the image will be converted from BGR/BGRA to RGB/RGBA, otherwise
the image is returned as is.
Returns:
The loaded image.
"""
img = cv2.imread(str(path), flags=flags)
if not as_rgb:
return img
if len(img.shape) == 2:
return img
c = img.shape[2]
if c == 3:
return cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
if c == 4:
return cv2.cvtColor(img, cv2.COLOR_BGRA2RGBA)
raise RuntimeError(
f"error: expected a 3 or 4 channel image but received {c} channels"
)
[docs]
def load_image_pil(
path: pathlib.Path, out_fmt: str = "", as_numpy: bool = True
) -> npt.NDArray | PIL.Image.Image:
"""
Load the given image using PIL.
``out_fmt`` is a format string that can be passed in to PIL.Image.convert. Please
`here <https://pillow.readthedocs.io/en/stable/reference/Image.html#PIL.Image.Image.convert>`__
for the list of accepted strings.
If no string is passed, the image will be converted to RGB format.
By default, the output is a NumPY array. If you need a PIL image instead, set
``as_numpy`` to false.
Args:
path: the path to the image to load.
out_fmt: the format to convert the loaded image to. Defaults to empty.
as_numpy: if true, the loaded image will be returned as a NumPY array, otherwise
it is returned as a PIL image. Defaults to true.
Returns:
The loaded image.
"""
with path.open(mode="rb") as infile:
img = PIL.Image.open(infile)
out = img.convert(out_fmt) if out_fmt != "" else img.convert("RGB")
if as_numpy:
return np.array(out)
return out
[docs]
def tensor_to_numpy(tens: torch.Tensor, as_float: bool = False) -> npt.NDArray:
"""
Convert the given tensor to a numpy array.
Args:
tens: the tensor to convert in the range :math:`[0, 1]`
as_float: whether to leave the output as float or convert to int.
Returns:
The converted array.
"""
as_np = tens.squeeze().float().clamp_(0, 1).cpu().detach().numpy()
if as_np.ndim == 3:
as_np = np.transpose(as_np, (1, 2, 0))
if not as_float:
as_np = np.uint8((as_np * 255.0).round()) # type: ignore[assignment]
return as_np
[docs]
def show_tensor(tens: torch.Tensor, title: str = "debug window") -> None:
"""
Display the image held by the tensor. Useful for debugging purposes.
Args:
tens: the image tensor to display in range :math:`[0, 1]`.
title: the title of the displayed window. Defaults to "debug window".
"""
img = tensor_to_numpy(tens)
img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
cv2.imshow(title, img)
cv2.waitKey(0)
[docs]
def show_tensors(tens: torch.Tensor) -> None:
"""
Show batches of tensors.
Args:
tens: the batch of tensors to display in range :math:`[0, 1]`.
"""
if len(tens.shape) == 3:
show_tensor(tens)
return
for b in range(tens.shape[0]):
show_tensor(tens[b], title=f"tensor[{b}]")
[docs]
def convert_to_hwc(img: npt.NDArray, input_order: str = "HWC") -> npt.NDArray:
"""
Change the order of the input image channels so the result is in (h, w, c) order.
If the input image is a single-channel image, then the return is (h, w, 1).
Args:
img: input image.
input_order: the order of the channels of the input image. Must be one of 'HWC'
or 'CHW'.
Returns:
The shuffled image.
"""
assert input_order in ("HWC", "CHW")
if len(img.shape) == 2:
img = img[..., None]
if input_order == "CHW":
img = img.transpose(1, 2, 0)
return img
[docs]
def to_y_channel(img: npt.NDArray) -> npt.NDArray:
"""
Return the Y (luma) channel of a YCbCr image.
Args:
img: input image in YCbCr format. Must be in the range :math:`[0, 255]`.
Returns:
The luma channel of the input image.
"""
img = img.astype(np.float32) / 255.0
if img.ndim == 3 and img.shape[2] == 3:
img = bgr2ycbcr(img, only_y=True)
img = img[..., None]
return img * 255.0
[docs]
def bgr2ycbcr(img: npt.NDArray, only_y: bool = False) -> npt.NDArray:
"""
Convert the given numpy image array from BGR to YCBCR.
If only the Y channel is required, set ``only_y`` to true.
Args:
img: the BGR image to convert.
only_y: if true, only the luma (Y) channel will be returned.
Returns:
The converted image.
"""
intype = img.dtype
img.astype(np.float32)
if intype != np.uint8:
img *= 255
if only_y:
rlt = np.dot(img, [24.966, 128.553, 65.481]) / 255.0 + 16.0
else:
rlt = np.matmul(
img,
[
[24.966, 112.0, -18.214],
[128.553, -74.203, -93.786],
[65.481, -37.797, 112.0],
],
) / 255.0 + [16, 128, 128]
if intype == np.uint8:
rlt = rlt.round()
else:
rlt /= 255.0
return rlt.astype(intype)
[docs]
def rgb2ycbcr_torch(img: torch.Tensor, only_y: bool = False) -> torch.Tensor:
"""
Convert the given torch Tensor image array from RGB to YCBCR.
If only the Y channel is required, set ``only_y`` to true.
Args:
img: the BGR image to convert.
only_y: if true, only the luma (Y) channel will be returned.
Returns:
The converted image.
"""
if only_y:
weight = torch.tensor([[65.481], [128.553], [24.966]]).to(img)
out_img = torch.matmul(img.permute(0, 2, 3, 1), weight).permute(0, 3, 1, 2) + 16.0
else:
weight = torch.tensor(
[
[65.481, -37.797, 112.0],
[128.553, -74.203, -93.786],
[24.966, 112.0, -18.214],
]
).to(img)
bias = torch.tensor([16, 128, 128]).view(1, 3, 1, 1).to(img)
out_img = torch.matmul(img.permute(0, 2, 3, 1), weight).permute(0, 3, 1, 2) + bias
out_img = out_img / 255.0
return out_img