Source code for crappy.tool.gpucorrel

# coding:utf-8

import warnings
from math import ceil
import numpy as np
from pkg_resources import resource_filename
from typing import Any
from .._global import OptionalModule

try:
  import cv2
except (ModuleNotFoundError, ImportError):
  cv2 = OptionalModule("opencv-python")

from .fields import get_field
from .._global import OptionalModule
try:
  import pycuda.driver as cuda
  from pycuda.compiler import SourceModule
  import pycuda.gpuarray as gpuarray
  from pycuda.reduction import ReductionKernel
except ImportError:
  cuda = OptionalModule("pycuda",
                        "PyCUDA and CUDA are necessary to use GPUCorrel")
  SourceModule = OptionalModule("pycuda",
                                "PyCUDA and CUDA are necessary to use "
                                "GPUCorrel")
  gpuarray = OptionalModule("pycuda",
                            "PyCUDA and CUDA are necessary to use GPUCorrel")
  ReductionKernel = OptionalModule("pycuda",
                                   "PyCUDA and CUDA are necessary to use "
                                   "GPUCorrel")


context = None


[docs]def interp_nearest(ary: np.ndarray, ny: int, nx: int) -> np.ndarray: """Used to interpolate the mask for each stage.""" if ary.shape == (ny, nx): return ary y, x = ary.shape rx = x / nx ry = y / ny out = np.empty((ny, nx), dtype=np.float32) for j in range(ny): for i in range(nx): out[j, i] = ary[int(ry * j + .5), int(rx * i + .5)] return out
# =======================================================================# # = =# # = Class CorrelStage: =# # = =# # =======================================================================#
[docs]class CorrelStage: """Run a correlation routine on an image, at a given resolution. Note: Multiple instances of this class are used for the pyramidal correlation in `Correl()`. Can but is not meant to be used as is. """ num = 0 # To count the instances so they get a unique number (self.num) def __init__(self, img_size: tuple, **kwargs) -> None: self.num = CorrelStage.num CorrelStage.num += 1 self.verbose = kwargs.get("verbose", 0) self.debug(2, "Initializing with resolution", img_size) self.h, self.w = img_size self._ready = False self.nbIter = kwargs.get("iterations", 5) self.showDiff = kwargs.get("show_diff", False) if self.showDiff: try: import cv2 except (ModuleNotFoundError, ImportError): cv2 = OptionalModule("opencv-python") cv2.namedWindow("Residual", cv2.WINDOW_NORMAL | cv2.WINDOW_KEEPRATIO) self.mul = kwargs.get("mul", 3) # These two store the values of the last resampled array # It is meant to allocate output array only once (see resample_d) self.rX, self.rY = -1, -1 # self.loop will be incremented every time get_disp is called # It will be used to measure performance and output some info self.loop = 0 # Allocating stuff # # Grid and block for kernels called with the size of the image # # All the images and arrays in the kernels will be in order (x,y) self.grid = (int(ceil(self.w / 32)), int(ceil(self.h / 32))) self.block = (int(ceil(self.w / self.grid[0])), int(ceil(self.h / self.grid[1])), 1) self.debug(3, "Default grid:", self.grid, "block", self.block) # We need the number of fields to allocate the G tables # self.Nfields = kwargs.get("Nfields") if self.Nfields is None: self.Nfields = len(kwargs.get("fields")[0]) # Allocating everything we need # self.devG = [] self.devFieldsX = [] self.devFieldsY = [] for i in range(self.Nfields): # devG stores the G arrays (to compute the research direction) self.devG.append(gpuarray.empty(img_size, np.float32)) # devFieldsX/Y store the fields value along X and Y self.devFieldsX.append(gpuarray.empty((self.h, self.w), np.float32)) self.devFieldsY.append(gpuarray.empty((self.h, self.w), np.float32)) # devH Stores the Hessian matrix self.H = np.zeros((self.Nfields, self.Nfields), np.float32) # And devHi stores its invert self.devHi = gpuarray.empty((self.Nfields, self.Nfields), np.float32) # devOut is written with the difference of the images self.devOut = gpuarray.empty((self.h, self.w), np.float32) # devX stores the value of the parameters (what is actually computed) self.devX = gpuarray.empty(self.Nfields, np.float32) # to store the research direction self.devVec = gpuarray.empty(self.Nfields, np.float32) # To store the original image on the device self.devOrig = gpuarray.empty(img_size, np.float32) # To store the gradient along X of the original image on the device self.devGradX = gpuarray.empty(img_size, np.float32) # And along Y self.devGradY = gpuarray.empty(img_size, np.float32) # Locating the kernel file # kernel_file = kwargs.get("kernel_file") if kernel_file is None: self.debug(2, "Kernel file not specified") kernel_file = resource_filename('crappy', 'tool/kernels.cu') # Reading kernels and compiling module # with open(kernel_file, "r") as f: self.debug(3, "Sourcing module") self.mod = SourceModule(f.read() % (self.w, self.h, self.Nfields)) # Assigning functions to the kernels # # These kernels are defined in tool/kernels.cu self._resampleOrigKrnl = self.mod.get_function('resampleO') self._resampleKrnl = self.mod.get_function('resample') self._gradientKrnl = self.mod.get_function('gradient') self._makeGKrnl = self.mod.get_function('makeG') self._makeDiff = self.mod.get_function('makeDiff') self._dotKrnl = self.mod.get_function('myDot') self._addKrnl = self.mod.get_function('kadd') # These ones use pyCuda reduction module to generate efficient kernels self._mulRedKrnl = ReductionKernel(np.float32, neutral="0", reduce_expr="a+b", map_expr="x[i]*y[i]", arguments="float *x, float *y") self._leastSquare = ReductionKernel(np.float32, neutral="0", reduce_expr="a+b", map_expr="x[i]*x[i]", arguments="float *x") # We could have used use mulRedKrnl(x,x), but this is probably faster ? # Getting texture references # self.tex = self.mod.get_texref('tex') self.tex_d = self.mod.get_texref('tex_d') self.texMask = self.mod.get_texref('texMask') # Setting proper flags # # All textures use normalized coordinates except for the mask for t in [self.tex, self.tex_d]: t.set_flags(cuda.TRSF_NORMALIZED_COORDINATES) for t in [self.tex, self.tex_d, self.texMask]: t.set_filter_mode(cuda.filter_mode.LINEAR) t.set_address_mode(0, cuda.address_mode.BORDER) t.set_address_mode(1, cuda.address_mode.BORDER) # Preparing kernels for less overhead when called # self._resampleOrigKrnl.prepare("Pii", texrefs=[self.tex]) self._resampleKrnl.prepare("Pii", texrefs=[self.tex_d]) self._gradientKrnl.prepare("PP", texrefs=[self.tex]) self._makeDiff.prepare("PPPP", texrefs=[self.tex, self.tex_d, self.texMask]) self._addKrnl.prepare("PfP") # Reading original image if provided # if kwargs.get("img") is not None: self.set_orig(kwargs.get("img")) # Reading fields if provided # if kwargs.get("fields") is not None: self.set_fields(*kwargs.get("fields")) # Reading mask if provided # if kwargs.get("mask") is not None: self.set_mask(kwargs.get("mask"))
[docs] def debug(self, n: int, *s: Any) -> None: """To print debug messages. Note: First argument is the level of the message. The others arguments will be displayed only if the `self.debug` var is superior or equal. Also, flag and indentation reflect respectively the origin and the level of the message. """ if n <= self.verbose: s2 = () for i in range(len(s)): s2 += (str(s[i]).replace("\n", "\n" + (10 + n) * " "),) print(" " * (n - 1) + "[Stage " + str(self.num) + "]", *s2)
[docs] def set_orig(self, img): """To set the original image from a given CPU or GPU array. Warning: If it is a GPU array, it will NOT be copied. Note: The most efficient method is to write directly over `self.devOrig` with some kernel and then run :meth:`update_orig`. """ assert img.shape == (self.h, self.w), \ "Got a {} image in a {} correlation routine!".format( img.shape, (self.h, self.w)) if isinstance(img, np.ndarray): self.debug(3, "Setting original image from ndarray") self.devOrig.set(img) elif isinstance(img, gpuarray.GPUArray): self.debug(3, "Setting original image from GPUArray") self.devOrig = img else: self.debug(0, "Error ! Unknown type of data given to set_orig()") raise ValueError self.update_orig()
[docs] def update_orig(self) -> None: """Needs to be called after `self.img_d` has been written directly.""" self.debug(3, "Updating original image") self.array = cuda.gpuarray_to_array(self.devOrig, 'C') # 'C' order implies tex2D(x,y) will fetch matrix(y,x): # this is where x and y are inverted to comply with the kernels order self.tex.set_array(self.array) self._compute_gradients() self._ready = False
def _compute_gradients(self) -> None: """Wrapper to call the gradient kernel.""" self._gradientKrnl.prepared_call(self.grid, self.block, self.devGradX.gpudata, self.devGradY.gpudata)
[docs] def prepare(self) -> None: """Computes all necessary tables to perform correlation. Note: This method must be called everytime the original image or fields are set. If not done by the user, it will be done automatically when needed. """ if not hasattr(self, 'maskArray'): self.debug(2, "No mask set when preparing, using a basic one, " "with a border of 5% the dimension") mask = np.zeros((self.h, self.w), np.float32) mask[self.h // 20:-self.h // 20, self.w // 20:-self.w // 20] = 1 self.set_mask(mask) if not self._ready: if not hasattr(self, 'array'): self.debug(1, "Tried to prepare but original texture is not set !") elif not hasattr(self, 'fields'): self.debug(1, "Tried to prepare but fields are not set !") else: self._make_g() self._make_h() self._ready = True self.debug(3, "Ready!") else: self.debug(1, "Tried to prepare when unnecessary, doing nothing...")
def _make_g(self) -> None: for i in range(self.Nfields): # Change to prepared call ? self._makeGKrnl(self.devG[i].gpudata, self.devGradX.gpudata, self.devGradY.gpudata, self.devFieldsX[i], self.devFieldsY[i], block=self.block, grid=self.grid) def _make_h(self) -> None: for i in range(self.Nfields): for j in range(i + 1): self.H[i, j] = self._mulRedKrnl(self.devG[i], self.devG[j]).get() if i != j: self.H[j, i] = self.H[i, j] self.debug(3, "Hessian:\n", self.H) self.devHi.set(np.linalg.inv(self.H)) # *1e-3) # Looks stupid but prevents a useless devHi copy if nothing is printed if self.verbose >= 3: self.debug(3, "Inverted Hessian:\n", self.devHi.get())
[docs] def resample_orig(self, new_y: int, new_x: int, dev_out) -> None: """To resample the original image. Note: Reads `orig.texture` and writes the interpolated `newX*newY` image to the `devOut` array. """ grid = (int(ceil(new_x / 32)), int(ceil(new_y / 32))) block = (int(ceil(new_x / grid[0])), int(ceil(new_y / grid[1])), 1) self.debug(3, "Resampling Orig texture, grid:", grid, "block:", block) self._resampleOrigKrnl.prepared_call(self.grid, self.block, dev_out.gpudata, np.int32(new_x), np.int32(new_y)) self.debug(3, "Resampled original texture to", dev_out.shape)
[docs] def resample_d(self, new_y: int, new_x: int): """Resamples `tex_d` and returns it in a `gpuarray`.""" if (self.rX, self.rY) != (np.int32(new_x), np.int32(new_y)): self.rGrid = (int(ceil(new_x / 32)), int(ceil(new_y / 32))) self.rBlock = (int(ceil(new_x / self.rGrid[0])), int(ceil(new_y / self.rGrid[1])), 1) self.rX, self.rY = np.int32(new_x), np.int32(new_y) self.devROut = gpuarray.empty((new_y, new_x), np.float32) self.debug(3, "Resampling img_d texture to", (new_y, new_x), " grid:", self.rGrid, "block:", self.rBlock) self._resampleKrnl.prepared_call(self.rGrid, self.rBlock, self.devROut.gpudata, self.rX, self.rY) return self.devROut
[docs] def set_fields(self, fields_x, fields_y) -> None: """Method to give the fields to identify with the routine. Note: This is necessary only once and can be done multiple times, but the routine have to be initialized with :meth:`prepare`, causing a slight overhead. Takes a :obj:`tuple` or :obj:`list` of 2 `(gpu)arrays[Nfields,x,y]` (one for displacement along `x` and one along `y`). """ self.debug(2, "Setting fields") if isinstance(fields_x, np.ndarray): self.devFieldsX.set(fields_x) self.devFieldsY.set(fields_y) elif isinstance(fields_x, gpuarray.GPUArray): self.devFieldsX = fields_x self.devFieldsY = fields_y self.fields = True
[docs] def set_image(self, img_d) -> None: """Set the image to compare with the original. Note: Calling this method is not necessary: you can do `.get_disp(image)`. This will automatically call this method first. """ assert img_d.shape == (self.h, self.w), \ "Got a {} image in a {} correlation routine!".format( img_d.shape, (self.h, self.w)) if isinstance(img_d, np.ndarray): self.debug(3, "Creating texture from numpy array") self.array_d = cuda.matrix_to_array(img_d, "C") elif isinstance(img_d, gpuarray.GPUArray): self.debug(3, "Creating texture from gpuarray") self.array_d = cuda.gpuarray_to_array(img_d, "C") else: self.debug(0, "Error ! Unknown type of data given to .set_image()") raise ValueError self.tex_d.set_array(self.array_d) self.devX.set(np.zeros(self.Nfields, dtype=np.float32))
def set_mask(self, mask) -> None: self.debug(3, "Setting the mask") assert mask.shape == (self.h, self.w), \ "Got a {} mask in a {} routine.".format(mask.shape, (self.h, self.w)) if not mask.dtype == np.float32: self.debug(2, "Converting the mask to float32") mask = mask.astype(np.float32) if isinstance(mask, np.ndarray): self.maskArray = cuda.matrix_to_array(mask, 'C') elif isinstance(mask, gpuarray.GPUArray): self.maskArray = cuda.gpuarray_to_array(mask, 'C') else: self.debug(0, "Error! Mask data type not understood") raise ValueError self.texMask.set_array(self.maskArray) def set_disp(self, x) -> None: assert x.shape == (self.Nfields,), \ "Incorrect initialization of the parameters" if isinstance(x, gpuarray.GPUArray): self.devX = x elif isinstance(x, np.ndarray): self.devX.set(x) else: self.debug(0, "Error! Unknown type of data given to " "CorrelStage.set_disp") raise ValueError def write_diff_file(self) -> None: self._makeDiff.prepared_call(self.grid, self.block, self.devOut.gpudata, self.devX.gpudata, self.devFieldsX.gpudata, self.devFieldsY.gpudata) diff = (self.devOut.get() + 128).astype(np.uint8) cv2.imwrite("/home/vic/diff/diff{}-{}.png" .format(self.num, self.loop), diff)
[docs] def get_disp(self, img_d=None): """The method that actually computes the weight of the fields.""" self.debug(3, "Calling main routine") self.loop += 1 # self.mul = 3 if not self._ready: self.debug(2, "Wasn't ready ! Preparing...") self.prepare() if img_d is not None: self.set_image(img_d) assert hasattr(self, 'array_d'), \ "Did not set the image, use set_image() before calling get_disp or " \ "give the image as parameter." self.debug(3, "Computing first diff table") self._makeDiff.prepared_call(self.grid, self.block, self.devOut.gpudata, self.devX.gpudata, self.devFieldsX.gpudata, self.devFieldsY.gpudata) self.res = self._leastSquare(self.devOut).get() self.debug(3, "res:", self.res / 1e6) # Iterating # # Note: I know this section is dense and wrappers for kernel calls could # have made things clearer, but function calls in python cause a # non-negligible overhead and this is the critical part. # The comments are here to guide you ! for i in range(self.nbIter): self.debug(3, "Iteration", i) for j in range(self.Nfields): # Computing the direction of the gradient of each parameters self.devVec[j] = self._mulRedKrnl(self.devG[j], self.devOut) # Newton method: we multiply the gradient vector by the pre-inverted # Hessian, devVec now contains the actual research direction. self._dotKrnl(self.devHi, self.devVec, grid=(1, 1), block=(self.Nfields, 1, 1)) # This line simply adds k times the research direction to devX # with a really simple kernel (does self.devX += k*self.devVec) self._addKrnl.prepared_call((1, 1), (self.Nfields, 1, 1), self.devX.gpudata, self.mul, self.devVec.gpudata) # Do not get rid of this condition: it will not change the output but # the parameters will be evaluated, this will copy data from the device if self.verbose >= 3: self.debug(3, "Direction:", self.devVec.get()) self.debug(3, "New X:", self.devX.get()) # To get the new residual self._makeDiff.prepared_call(self.grid, self.block, self.devOut.gpudata, self.devX.gpudata, self.devFieldsX.gpudata, self.devFieldsY.gpudata) oldres = self.res self.res = self._leastSquare(self.devOut).get() # If we moved away, revert changes and stop iterating if self.res >= oldres: self.debug(3, "Diverting from the solution new res={} >= {}!" .format(self.res / 1e6, oldres / 1e6)) self._addKrnl.prepared_call((1, 1), (self.Nfields, 1, 1), self.devX.gpudata, -self.mul, self.devVec.gpudata) self.res = oldres self.debug(3, "Undone: X=", self.devX.get()) break self.debug(3, "res:", self.res / 1e6) # self.write_diff_file() if self.showDiff: cv2.imshow("Residual", (self.devOut.get() + 128).astype(np.uint8)) cv2.waitKey(1) return self.devX.get()
# =======================================================================# # = =# # = Class Correl: =# # = =# # =======================================================================#
[docs]class GPUCorrel: """Identify the displacement between two images. This class is the core of the Correl block. It is meant to be efficient enough to run in real-time. It relies on :class:`CorrelStage` to perform correlation on different scales. Requirements: - The computer must have a Nvidia video card with compute capability `>= 3.0` - `CUDA 5.0` or higher (only tested with `CUDA 7.5`) - `pycuda 2014.1` or higher (only tested with pycuda `2016.1.1`) Presentation: This class takes a :obj:`list` of fields. These fields will be the base of deformation in which the displacement will be identified. When given two images, it will identify the displacement between the original and the second image in this base as closely as possible lowering square-residual using provided displacements. This class is highly flexible and performs on GPU for faster operation. Usage: At initialization, Correl needs only one unnamed argument: the working resolution (as a :obj:`tuple` of :obj:`int`), which is the resolution of the images it will be given. All the images must have exactly these dimensions. The dimensions must be given in this order: `(y,x)` (like `openCV` images) At initialization or after, this class takes a reference image. The deformations on this image are supposed to be all equal to `0`. It also needs a number of deformation fields (technically limited to `~500` fields, probably much less depending on the resolution and the amount of memory on the graphics card). Finally, you need to provide the deformed image you want to process. It will then identify parameters of the sum of fields that lowers the square sum of differences between the original image and the second one displaced with the resulting field. This class will resample the images and perform identification on a lower resolution, use the result to initialize the next stage, and again util it reaches the last stage. It will then return the computed parameters. The number of levels can be set with ``levels=x``. The latest parameters returned (if any) are used to initialize computation when called again, to help identify large displacement. It is particularly adapted to slow deformations. To lower the residual, this program computes the gradient of each parameter and uses Newton method to converge as fast as possible. The number of iterations for the resolution can also be set. Args: img_size (:obj:`tuple`): tuple of 2 :obj:`int`, `(y,x)`, the working resolution verbose (:obj:`int`): Use ``verbose=x`` to choose the amount of information printed to the console: - `0`: Nothing except for errors - `1`: Only important info and warnings - `2`: Major info and a few values periodically (at a bearable rate) - `3`: Tons of info including details of each iteration Note that `verbose=3` REALLY slows the program down. To be used only for debug. fields (:obj:`list`): Use ``fields=[...]`` to set the fields. This can be done later with :meth:`set_fields`, however in case when the fields are set later, you need to add ``Nfields=x`` to specify at :meth:`__init__` the number of expected fields in order to allocate all the necessary memory on the device. The fields should be given as a :obj:`list` of :obj:`tuple` of 2 `numpy.ndarrays` or `gpuarray.GPUArray` of the size of the image, each array corresponds to the displacement in pixel along respectively `X` and `Y`. You can also use a :obj:`str` instead of the :obj:`tuple` for the common fields: - Rigid body and linear deformations: - `'x'`: Movement along `X` - `'y'`: Movement along `Y` - `'r'`: Rotation (in the trigonometric direction) - `'exx'`: Stretch along `X` - `'eyy'`: Stretch along `Y` - `'exy'`: Shear - `'z'`: Zoom (dilatation) (`=exx+eyy`) Note that you should not try to identify `exx`, `eyy` AND `z` at the same time (one of them is redundant). - Quadratic deformations: These fields are more complicated to interpret but can be useful for complicated solicitations such as biaxial stretch. `U` and `V` represent the displacement along respectively `x` and `y`. - `'uxx'`: `U(x,y) = x²` - `'uyy'`: `U(x,y) = y²` - `'uxy'`: `U(x,y) = xy` - `'vxx'`: `V(x,y) = x²` - `'vyy'`: `V(x,y) = y²` - `'vxy'`: `V(x,y) = xy` All of these default fields are normalized to have a max displacement of `1` pixel and are centered in the middle of the image. They are generated to have the size of your image. You can mix strings and tuples at your convenience to perform your identification. Example: :: fields=['x', 'y', (MyFieldX, MyFieldY)] where `MyfieldX` and `MyfieldY` are numpy arrays with the same shape as the images Example of memory usage: On a 2048x2048 image, count roughly `180 + 100*Nfields` MB of VRAM img: The original image. It must be given as a 2D `numpy.ndarray`. This block works with `dtype=np.float32`. If the `dtype` of the given image is different, it will print a warning and the image will be converted. It can be given at :meth:`__init__` with the kwarg ``img=MyImage`` or later with ``set_orig(MyImage)``. Note: You can reset it whenever you want, even multiple times but it will reset the def parameters to `0`. Once fields and original image are set, there is a short preparation time before correlation can be performed. You can do this preparation yourself by using :meth:`prepare`. If not called, it will be done automatically when necessary, inducing a slight overhead at the first call of :meth:`get_disp` after setting/updating the fields or original image. levels (:obj:`int`, optional): Number of levels of the pyramid. More levels can help converging with large and quick deformations but may fail on images without low spatial frequency. Fewer levels mean that the program will run faster. resampling_factor (:obj:`float`, optional): The resolution will be divided by this parameter between each stage of the pyramid. Low, can allow coherence between stages but is more expensive. High, reaches small resolutions in less levels and is faster but be careful not to loose consistency between stages. iterations (:obj:`int`, optional): The MAXIMUM number of iteration to be ran before returning the values. Note that if the residual increases before reaching `x` iterations, the block will return anyway. mask (optional): To set the mask, to weight the zone of interest on the images. It is particularly useful to prevent undesired effects on the border of the images. If no mask is given, a rectangular mask will be used, with border of `5%` the size of the image. show_diff (:obj:`bool`, optional): Will open a :mod:`cv2` window and print the difference between the original and the displaced image after correlation. `128 Gray` means no difference, lighter means positive and darker negative. kernel_file (:obj:`str`, optional): Where `crappy_install_dir` is the root directory of the installation of crappy (``crappy.__path__``). mul (:obj:`float`, optional): This parameter is critical. The direction will be multiplied by this scalar before being added to the solution. It defines how "fast" we move towards the solution. High value, fast convergence but risk to go past the solution and diverge (the program does not try to handle this and if the residual rises, iterations will stop immediately). Low value, probably more precise but slower and may require more iterations. After multiple tests, 3 was found to be a pretty acceptable value. Don't hesitate to adapt it to your case. Use ``verbose=3`` and see if the convergence is too slow or too fast. Note: The compared image can be given directly when querying the displacement as a parameter to :meth:`get_disp` or before, with :meth:`set_image`. You can provide it as a `np.ndarray` just like `orig`, or as a `pycuda.gpuarray.GPUArray`. """ # Todo """ This section lists all the considered improvements for this program. These features may NOT all be implemented in the future. They are sorted by priority. - Allow faster execution by executing the reduction only on a part of the images (random or chosen) - Add the possibility to return the value of the deformation `Exx` and `Eyy` in a specific point - Add a parameter to return values in `%` - Add a filter to smooth/ignore incorrect values - Allow a reset of the reference picture for simple deformations to enhance robustness in case of large deformations or lightning changes - Restart iterating from `0` once in a while to see if the residual is lower. Can be useful to recover when diverged critically due to an incorrect image (Shadow, obstruction, flash, camera failure, ...) """ def __init__(self, img_size: tuple, **kwargs) -> None: global context if 'context' in kwargs: context = kwargs.pop('context') else: cuda.init() try: from pycuda.tools import make_default_context except (ImportError, ModuleNotFoundError): make_default_context = OptionalModule("pycuda", "PyCUDA and CUDA are necessary " "to use GPUCorrel") context = make_default_context() unknown = [] for k in kwargs.keys(): if k not in ['verbose', 'levels', 'resampling_factor', 'kernel_file', 'iterations', 'show_diff', 'Nfields', 'img', 'fields', 'mask', 'mul']: unknown.append(k) if len(unknown) != 0: warnings.warn("Unrecognized parameter" + ( 's: ' + str(unknown) if len(unknown) > 1 else ': ' + unknown[0]), SyntaxWarning) self.verbose = kwargs.get("verbose", 0) self.debug(3, "You set the verbose level to the maximum.\n\ It may help finding bugs or tracking errors but it may also \ impact the program performance as it will print A LOT of \ output and add GPU->CPU copies only to print information.\n\ If it is not desired, consider lowering the verbosity: \ 1 or 2 is a reasonable choice, \ 0 won't show anything except for errors.") self.levels = kwargs.get("levels", 5) self.loop = 0 self.resamplingFactor = kwargs.get("resampling_factor", 2) h, w = img_size self.nbIter = kwargs.get("iterations", 4) self.debug(1, "Initializing... Master resolution:", img_size, "levels:", self.levels, "verbosity:", self.verbose) # Computing dimensions of the different levels # self.h, self.w = [], [] for i in range(self.levels): self.h.append(int(round(h / (self.resamplingFactor ** i)))) self.w.append(int(round(w / (self.resamplingFactor ** i)))) if kwargs.get("Nfields") is not None: self.Nfields = kwargs.get("Nfields") else: try: self.Nfields = len(kwargs["fields"]) except KeyError: self.debug(0, "Error! You must provide the number of fields at init. \ Add Nfields=x or directly set fields with fields=list/tuple") raise ValueError kernel_file = kwargs.get("kernel_file") if kernel_file is None: self.debug(3, "Kernel file not specified, using the one in crappy dir") kernel_file = resource_filename('crappy', 'tool/kernels.cu') self.debug(3, "Kernel file:", kernel_file) # Creating a new instance of CorrelStage for each stage # self.correl = [] for i in range(self.levels): self.correl.append(CorrelStage((self.h[i], self.w[i]), verbose=self.verbose, Nfields=self.Nfields, iterations=self.nbIter, show_diff=(i == 0 and kwargs.get( "show_diff", False)), mul=kwargs.get("mul", 3), kernel_file=kernel_file)) # Set original image if provided # if kwargs.get("img") is not None: self.set_orig(kwargs.get("img")) s = """ texture<float, cudaTextureType2D, cudaReadModeElementType> texFx{0}; texture<float, cudaTextureType2D, cudaReadModeElementType> texFy{0}; __global__ void resample{0}(float* outX, float* outY, int x, int y) {{ int idx = blockIdx.x*blockDim.x+threadIdx.x; int idy = blockIdx.y*blockDim.y+threadIdx.y; if(idx < x && idy < y) {{ outX[idy*x+idx] = tex2D(texFx{0},(float)idx/x, (float)idy/y); outY[idy*x+idx] = tex2D(texFy{0},(float)idx/x, (float)idy/y); }} }} """ self.src = "" for i in range(self.Nfields): self.src += s.format(i) # Adding textures for the quick fields # resampling self.mod = SourceModule(self.src) self.texFx = [] self.texFy = [] self.resampleF = [] for i in range(self.Nfields): self.texFx.append(self.mod.get_texref("texFx%d" % i)) self.texFy.append(self.mod.get_texref("texFy%d" % i)) self.resampleF.append(self.mod.get_function("resample%d" % i)) self.resampleF[i].prepare("PPii", texrefs=[self.texFx[i], self.texFy[i]]) for t in self.texFx + self.texFy: t.set_flags(cuda.TRSF_NORMALIZED_COORDINATES) t.set_filter_mode(cuda.filter_mode.LINEAR) t.set_address_mode(0, cuda.address_mode.BORDER) t.set_address_mode(1, cuda.address_mode.BORDER) # Set fields if provided # if kwargs.get("fields") is not None: self.set_fields(kwargs.get("fields")) if kwargs.get("mask") is not None: self.set_mask(kwargs.get("mask"))
[docs] def get_fields(self, y: int = None, x: int = None) -> tuple: """Returns the fields, resampled to size `(y,x)`.""" if x is None or y is None: y = self.h[0] x = self.w[0] out_x = gpuarray.empty((self.Nfields, y, x), np.float32) out_y = gpuarray.empty((self.Nfields, y, x), np.float32) grid = (int(ceil(x / 32)), int(ceil(y / 32))) block = (int(ceil(x / grid[0])), int(ceil(y / grid[1])), 1) for i in range(self.Nfields): self.resampleF[i].prepared_call(grid, block, out_x[i, :, :].gpudata, out_y[i, :, :].gpudata, np.int32(x), np.int32(y)) return out_x, out_y
[docs] def debug(self, n: int, *s: Any) -> None: """To print debug info. First argument is the level of the message. It wil be displayed only if the `self.debug` is superior or equal. """ if n <= self.verbose: print(" " * (n - 1) + "[Correl]", *s)
[docs] def set_orig(self, img) -> None: """To set the original image. This is the reference with which the second image will be compared. """ self.debug(2, "updating original image") assert isinstance(img, np.ndarray), "Image must be a numpy array" assert len(img.shape) == 2, "Image must have 2 dimensions (got {})" \ .format(len(img.shape)) assert img.shape == (self.h[0], self.w[0]), "Wrong size!" if img.dtype != np.float32: warnings.warn("Correl() takes arrays with dtype np.float32 to allow GPU " "computing (got {}). Converting to float32." .format(img.dtype), RuntimeWarning) img = img.astype(np.float32) self.correl[0].set_orig(img) for i in range(1, self.levels): self.correl[i - 1].resample_orig(self.h[i], self.w[i], self.correl[i].devOrig) self.correl[i].update_orig()
def set_fields(self, fields: list) -> None: assert self.Nfields == len(fields), \ "Cannot change the number of fields on the go!" # Choosing the right function to copy if isinstance(fields[0], str) or isinstance(fields[0][0], np.ndarray): to_array = cuda.matrix_to_array elif isinstance(fields[0][0], gpuarray.GPUArray): to_array = cuda.gpuarray_to_array else: self.debug(0, "Error ! Incorrect fields argument. \ See docstring of Correl") raise ValueError # These list store the arrays for the fields texture # (to be interpolated quickly for each stage) self.fieldsXArray = [] self.fieldsYArray = [] for i in range(self.Nfields): if isinstance(fields[i], str): fields[i] = get_field(fields[i].lower(), self.h[0], self.w[0]) self.fieldsXArray.append(to_array(fields[i][0], "C")) self.texFx[i].set_array(self.fieldsXArray[i]) self.fieldsYArray.append(to_array(fields[i][1], "C")) self.texFy[i].set_array(self.fieldsYArray[i]) for i in range(self.levels): self.correl[i].set_fields(*self.get_fields(self.h[i], self.w[i])) def prepare(self) -> None: for c in self.correl: c.prepare() self.debug(2, "Ready!") def save_all_images(self, name: str = "out") -> None: try: import cv2 except (ModuleNotFoundError, ImportError): cv2 = OptionalModule("opencv-python") self.debug(1, "Saving all images with the name", name + "X.png") for i in range(self.levels): out = self.correl[i].devOrig.get().astype(np.uint8) cv2.imwrite(name + str(i) + ".png", out) def set_image(self, img_d) -> None: if img_d.dtype != np.float32: warnings.warn("Correl() takes arrays with dtype np.float32 \ to allow GPU computing (got {}). Converting to float32." .format(img_d.dtype), RuntimeWarning) img_d = img_d.astype(np.float32) self.correl[0].set_image(img_d) for i in range(1, self.levels): self.correl[i].set_image( self.correl[i - 1].resample_d(self.correl[i].h, self.correl[i].w)) def set_mask(self, mask) -> None: for i in range(self.levels): self.correl[i].set_mask(interp_nearest(mask, self.h[i], self.w[i]))
[docs] def get_disp(self, img_d=None): """To get the displacement. This will perform the correlation routine on each stage, initializing with the previous values every time it will return the computed parameters as a list. """ self.loop += 1 if img_d is not None: self.set_image(img_d) try: disp = self.last / (self.resamplingFactor ** self.levels) except AttributeError: disp = np.array([0] * self.Nfields, dtype=np.float32) for i in reversed(range(self.levels)): disp *= self.resamplingFactor self.correl[i].set_disp(disp) disp = self.correl[i].get_disp() self.last = disp # Every 10 images, print the values (if debug >=2) if self.loop % 10 == 0: self.debug(2, "Loop", self.loop, ", values:", self.correl[0].devX.get(), ", res:", self.correl[0].res / 1e6) return disp
[docs] def get_res(self, lvl: int = 0): """Returns the last residual of the specified level (`0` by default). Usually, the correlation is correct when `res < ~1e9-10` but it really depends on the images: you need to find the value that suit your own images, depending on the resolution, contrast, correlation method etc... You can use :meth:`write_diff_file` to visualize the difference between the two images after correlation. """ return self.correl[lvl].res
[docs] def write_diff_file(self, level: int = 0): """To see the difference between the two images with the computed parameters. It writes a single channel picture named `"diff.png"` where `128 gray` is exact equality, lighter pixels show positive difference and darker pixels a negative difference. Useful to see if correlation succeeded and to identify the origin of non convergence. """ self.correl[level].write_diff_file()
[docs] @staticmethod def clean(): """Needs to be called at the end, to destroy the context properly.""" context.pop()