diff --git a/zUtil_DataStructures/GtCellBenchmarks.m b/zUtil_DataStructures/GtCellBenchmarks.m
deleted file mode 100644
index 745115352d56dbd550436ce1d79060c1dc8a4999..0000000000000000000000000000000000000000
--- a/zUtil_DataStructures/GtCellBenchmarks.m
+++ /dev/null
@@ -1,306 +0,0 @@
-classdef GtCellBenchmarks < GtBenchmarks
-    methods (Static)
-        function printHeader(funcName, dataSize)
-            fprintf('\n%- 40s (Data size: %f GB)\n', funcName, dataSize / 2^30);
-        end
-
-        function printResult(message, result, dataVols)
-            fprintf([message '\t'], result);
-            if (exist('dataVols', 'var'))
-                fprintf('Throughput: %f GB/s\n', dataVols / 2^30 / result);
-            end
-        end
-
-        function arrays = getArrays(numVols, sizeVols, precision)
-            if (~exist('precision', 'var'))
-                precision = 'double';
-            end
-            arrays = cell(1, numVols);
-            for ii = 1:numVols
-                arrays{ii} = rand(sizeVols, sizeVols, sizeVols, precision);
-            end
-        end
-
-        function bytes = getSizeVariable(var)
-            info = whos('var');
-            bytes = info.bytes;
-        end
-
-        function verifyError(reference, computed)
-            fprintf('Checking errors in computation:\n')
-            if (iscell(reference))
-                fprintf('\t%g\n', max(cellfun(@(x, y)max(max(max(abs(x - y)))), reference, computed)));
-            elseif (isscalar(reference))
-                fprintf('\t%g (%g relative)\n', abs(reference - computed), abs((reference - computed)/reference) );
-            else
-                fprintf('\t%g\n', max(max(max(abs(reference - computed)))) );
-            end
-        end
-
-        function newVols = benchmarkCopy(vols)
-            bytes = GtBenchmarks.getSizeVariable(vols);
-            %internal_cell_copy.cpp
-            GtBenchmarks.printHeader('internal_cell_copy', bytes);
-            tic();
-            newVols = internal_cell_copy(vols);
-            GtBenchmarks.printResult('C (copy): %f', toc(), bytes);
-            for ii = 1:length(newVols)
-                newVols{ii} = permute(newVols{ii}, [2 1 3]);
-            end
-        end
-
-        function benchmarkDiv(vols, newVols)
-            bytes = 2 * GtBenchmarks.getSizeVariable(vols);
-            %internal_cell_div_assign.cpp
-            GtBenchmarks.printHeader('internal_cell_div_[assign, copy]', bytes);
-            dummy2 = internal_cell_copy(vols);
-            tic();
-            dummy2 = internal_cell_div_assign(dummy2, newVols);
-            GtBenchmarks.printResult('C (ass):  %f', toc(), bytes);
-            dummy3 = internal_cell_copy(vols);
-            tic();
-            dummy3 = internal_cell_div_copy(dummy3, newVols);
-            GtBenchmarks.printResult('C (copy): %f', toc(), bytes);
-
-            dummy = vols;
-            tic();
-            dummy = cellfun(@(x, y){x ./ y}, dummy, newVols);
-            GtBenchmarks.printResult('cellfun:  %f', toc(), bytes);
-
-            dummy = vols;
-            tic();
-            for ii = 1:length(dummy)
-                dummy{ii} = dummy{ii} ./ newVols{ii};
-            end
-            GtBenchmarks.printResult('for loop: %f', toc(), bytes);
-
-            GtBenchmarks.verifyError(dummy, dummy2);
-            GtBenchmarks.verifyError(dummy, dummy3);
-        end
-
-        function benchmarkProd(vols, newVols)
-            bytes = 2 * GtBenchmarks.getSizeVariable(vols);
-            % internal_cell_prod_assign.cpp
-            GtBenchmarks.printHeader('internal_cell_prod_[assign, copy]', bytes);
-            dummy2 = internal_cell_copy(vols);
-            tic();
-            dummy2 = internal_cell_prod_assign(dummy2, newVols);
-            GtBenchmarks.printResult('C (ass):  %f', toc(), bytes);
-            dummy3 = internal_cell_copy(vols);
-            tic();
-            dummy3 = internal_cell_prod_copy(dummy3, newVols);
-            GtBenchmarks.printResult('C (copy): %f', toc(), bytes);
-
-            dummy = vols;
-            tic();
-            dummy = cellfun(@(x, y){x .* y}, dummy, newVols);
-            GtBenchmarks.printResult('cellfun:  %f', toc(), bytes);
-
-            dummy = vols;
-            tic();
-            for ii = 1:length(dummy)
-                dummy{ii} = dummy{ii} .* newVols{ii};
-            end
-            GtBenchmarks.printResult('for loop: %f', toc(), bytes);
-
-            GtBenchmarks.verifyError(dummy, dummy2);
-            GtBenchmarks.verifyError(dummy, dummy3);
-        end
-
-        function benchmarkProdScalar(vols, beta)
-            bytes = GtBenchmarks.getSizeVariable(vols);
-            % internal_cell_prod_scalar_assign.cpp
-            GtBenchmarks.printHeader('internal_cell_prod_scalar_[assign, copy]', bytes);
-            dummy2 = internal_cell_copy(vols);
-            tic();
-            dummy2 = internal_cell_prod_scalar_assign(dummy2, beta);
-            GtBenchmarks.printResult('C (ass):  %f', toc(), bytes);
-            dummy3 = internal_cell_copy(vols);
-            tic();
-            dummy3 = internal_cell_prod_scalar_copy(dummy3, beta);
-            GtBenchmarks.printResult('C (copy): %f', toc(), bytes);
-
-            dummy = vols;
-            tic();
-            dummy = cellfun(@(x){x * beta}, dummy);
-            GtBenchmarks.printResult('cellfun:  %f', toc(), bytes);
-
-            dummy = vols;
-            tic();
-            for ii = 1:length(dummy)
-                dummy{ii} = dummy{ii} * beta;
-            end
-            GtBenchmarks.printResult('for loop: %f', toc(), bytes);
-
-            GtBenchmarks.verifyError(dummy, dummy2);
-            GtBenchmarks.verifyError(dummy, dummy3);
-        end
-
-        function benchmarkSumScalar(vols, beta)
-            bytes = GtBenchmarks.getSizeVariable(vols);
-            % internal_cell_sum_scalar_assign.cpp
-            GtBenchmarks.printHeader('internal_cell_sum_scalar_[assign, copy]', bytes);
-            dummy2 = internal_cell_copy(vols);
-            tic();
-            dummy2 = internal_cell_sum_scalar_assign(dummy2, beta);
-            GtBenchmarks.printResult('C (ass):  %f', toc(), bytes);
-            dummy3 = internal_cell_copy(vols);
-            tic();
-            dummy3 = internal_cell_sum_scalar_copy(dummy3, beta);
-            GtBenchmarks.printResult('C (copy): %f', toc(), bytes);
-
-            dummy = vols;
-            tic();
-            dummy = cellfun(@(x){x + beta}, dummy);
-            GtBenchmarks.printResult('cellfun:  %f', toc(), bytes);
-
-            dummy = vols;
-            tic();
-            for ii = 1:length(dummy)
-                dummy{ii} = dummy{ii} + beta;
-            end
-            GtBenchmarks.printResult('for loop: %f', toc(), bytes);
-
-            GtBenchmarks.verifyError(dummy, dummy2);
-            GtBenchmarks.verifyError(dummy, dummy3);
-        end
-
-        function benchmarkSum(vols, newVols)
-            bytes = 2 * GtBenchmarks.getSizeVariable(vols);
-            % internal_cell_sum_assign.cpp
-            GtBenchmarks.printHeader('internal_cell_sum_[assign, copy]', bytes);
-            dummy2 = internal_cell_copy(vols);
-            tic();
-            dummy2 = internal_cell_sum_assign(dummy2, newVols);
-            GtBenchmarks.printResult('C (ass):  %f', toc(), bytes);
-            dummy3 = internal_cell_copy(vols);
-            tic();
-            dummy3 = internal_cell_sum_copy(dummy3, newVols);
-            GtBenchmarks.printResult('C (copy): %f', toc(), bytes);
-
-            dummy = vols;
-            tic();
-            dummy = cellfun(@(x, y){x + y}, dummy, newVols);
-            GtBenchmarks.printResult('cellfun:  %f', toc(), bytes);
-
-            dummy = vols;
-            tic();
-            for ii = 1:length(dummy)
-                dummy{ii} = dummy{ii} + newVols{ii};
-            end
-            GtBenchmarks.printResult('for loop: %f', toc(), bytes);
-
-            GtBenchmarks.verifyError(dummy, dummy2);
-            GtBenchmarks.verifyError(dummy, dummy3);
-        end
-
-        function benchmarkSub(vols, newVols)
-            bytes = 2 * GtBenchmarks.getSizeVariable(vols);
-            % internal_cell_sum_assign.cpp
-            GtBenchmarks.printHeader('internal_cell_sub_[assign, copy]', bytes);
-            dummy2 = internal_cell_copy(vols);
-            tic();
-            dummy2 = internal_cell_sub_assign(dummy2, newVols);
-            GtBenchmarks.printResult('C (ass):  %f', toc(), bytes);
-            dummy3 = internal_cell_copy(vols);
-            tic();
-            dummy3 = internal_cell_sub_copy(dummy3, newVols);
-            GtBenchmarks.printResult('C (copy): %f', toc(), bytes);
-
-            dummy = vols;
-            tic();
-            dummy = cellfun(@(x, y){x - y}, dummy, newVols);
-            GtBenchmarks.printResult('cellfun:  %f', toc(), bytes);
-
-            dummy = vols;
-            tic();
-            for ii = 1:length(dummy)
-                dummy{ii} = dummy{ii} - newVols{ii};
-            end
-            GtBenchmarks.printResult('for loop: %f', toc(), bytes);
-
-            GtBenchmarks.verifyError(dummy, dummy2);
-            GtBenchmarks.verifyError(dummy, dummy3);
-        end
-
-        function benchmarkScaledSum(vols, newVols, beta)
-            bytes = 2 * GtBenchmarks.getSizeVariable(vols);
-            % internal_cell_sum_scaled_assign.cpp
-            GtBenchmarks.printHeader('internal_cell_sum_scaled_[assign, copy]', bytes);
-            dummy2 = internal_cell_copy(vols);
-            tic();
-            dummy2 = internal_cell_sum_scaled_assign(dummy2, newVols, beta);
-            GtBenchmarks.printResult('C (ass):  %f', toc(), bytes);
-            dummy3 = internal_cell_copy(vols);
-            tic();
-            dummy3 = internal_cell_sum_scaled_copy(dummy3, newVols, beta);
-            GtBenchmarks.printResult('C (copy): %f', toc(), bytes);
-
-            dummy = vols;
-            tic();
-            dummy = cellfun(@(x, y){x + y * beta}, dummy, newVols);
-            GtBenchmarks.printResult('cellfun:  %f', toc(), bytes);
-
-            dummy = vols;
-            tic();
-            for ii = 1:length(dummy)
-                dummy{ii} = dummy{ii} + newVols{ii} * beta;
-            end
-            GtBenchmarks.printResult('for loop: %f', toc(), bytes);
-
-            GtBenchmarks.verifyError(dummy, dummy2);
-            GtBenchmarks.verifyError(dummy, dummy3);
-        end
-
-        function benchmarkScaledSub(vols, newVols, beta)
-            bytes = 2 * GtBenchmarks.getSizeVariable(vols);
-            % internal_cell_sub_scaled_assign.cpp
-            GtBenchmarks.printHeader('internal_cell_sub_scaled_[assign, copy]', bytes);
-            dummy2 = internal_cell_copy(vols);
-            tic();
-            dummy2 = internal_cell_sub_scaled_assign(dummy2, newVols, beta);
-            GtBenchmarks.printResult('C (ass):  %f', toc(), bytes);
-            dummy3 = internal_cell_copy(vols);
-            tic();
-            dummy3 = internal_cell_sub_scaled_copy(dummy3, newVols, beta);
-            GtBenchmarks.printResult('C (copy): %f', toc(), bytes);
-
-            dummy = vols;
-            tic();
-            dummy = cellfun(@(x, y){x - y * beta}, dummy, newVols);
-            GtBenchmarks.printResult('cellfun:  %f', toc(), bytes);
-
-            dummy = vols;
-            tic();
-            for ii = 1:length(dummy)
-                dummy{ii} = dummy{ii} - newVols{ii} * beta;
-            end
-            GtBenchmarks.printResult('for loop: %f', toc(), bytes);
-
-            GtBenchmarks.verifyError(dummy, dummy2);
-            GtBenchmarks.verifyError(dummy, dummy3);
-        end
-
-        function allBenchmarks(vols)
-            beta = 2.0;
-
-            newVols = GtCellBenchmarks.benchmarkCopy(vols);
-
-            GtCellBenchmarks.benchmarkDiv(vols, newVols);
-
-            GtCellBenchmarks.benchmarkProd(vols, newVols);
-
-            GtCellBenchmarks.benchmarkProdScalar(vols, beta);
-
-            GtCellBenchmarks.benchmarkSum(vols, newVols);
-
-            GtCellBenchmarks.benchmarkSumScalar(vols, beta);
-
-            GtCellBenchmarks.benchmarkSub(vols, newVols);
-
-            GtCellBenchmarks.benchmarkScaledSum(vols, newVols, beta);
-
-            GtCellBenchmarks.benchmarkScaledSub(vols, newVols, beta);
-        end
-    end
-end
diff --git a/zUtil_Deformation/Gt6DAlgoBenchmarks.m b/zUtil_Deformation/Gt6DAlgoBenchmarks.m
index 0d53a2ca6422b5e5ae01c96e0035f686ed21cf4a..b6bff1a78c8a525eaf14892948846dacc263e5d9 100644
--- a/zUtil_Deformation/Gt6DAlgoBenchmarks.m
+++ b/zUtil_Deformation/Gt6DAlgoBenchmarks.m
@@ -23,7 +23,7 @@ classdef Gt6DAlgoBenchmarks < GtBenchmarks
             fprintf(' - Doing %d safe copies: ', numIters)
             copy_p = cell(1, numIters);
             for ii = 1:numIters
-                copy_p{ii} = internal_cell_copy(p);
+                copy_p{ii} = gtCxxMathsCellCopy(p);
             end
             fprintf(' Done.\n')
 
@@ -62,7 +62,7 @@ classdef Gt6DAlgoBenchmarks < GtBenchmarks
             fprintf(' - Doing %d safe copies: ', numIters)
             copy_q = cell(1, numIters);
             for ii = 1:numIters
-                copy_q{ii} = internal_cell_copy(q);
+                copy_q{ii} = gtCxxMathsCellCopy(q);
             end
             fprintf(' Done.\n')
 
diff --git a/zUtil_Deformation/Gt6DBlobReconstructor.m b/zUtil_Deformation/Gt6DBlobReconstructor.m
index 8747ac178df8d1320ec7170b2217ecb1dff67aa4..12234a7794758d06b7dae74455e5a61e32e34aec 100644
--- a/zUtil_Deformation/Gt6DBlobReconstructor.m
+++ b/zUtil_Deformation/Gt6DBlobReconstructor.m
@@ -257,11 +257,11 @@ classdef Gt6DBlobReconstructor < Gt6DVolumeToBlobProjector
                         % q_l1 is just a volume of zeros
                         compute_update_primal = @(ii){};
                     case '6DL1'
-                        compute_update_primal = @(ii)internal_cell_prod_scalar_copy(q_l1(ii), conf.lambda_l1);
+                        compute_update_primal = @(ii)gtCxxMathsCellTimes(q_l1(ii), conf.lambda_l1, 'threads', self.num_threads);
                     case '6DTV'
                         compute_update_primal = @(ii)get_divergence(self, mdiv_tv, ii);
                     case '6DTVL1'
-                        compute_update_primal = @(ii)[get_divergence(self, mdiv_tv, ii), internal_cell_prod_scalar_copy(q_l1(ii), conf.lambda_l1)];
+                        compute_update_primal = @(ii)[get_divergence(self, mdiv_tv, ii), gtCxxMathsCellTimes(q_l1(ii), conf.lambda_l1, 'threads', self.num_threads)];
                 end
 
                 % Computing update dual ODF
@@ -500,17 +500,17 @@ classdef Gt6DBlobReconstructor < Gt6DVolumeToBlobProjector
                         temp_bls = 4 .* sigma{n} .* self.blobs{n};
                         temp_d = (temp_p - 1) .^ 2 + temp_bls;
                         temp_d = sqrt(temp_d);
-                        p{n} = (1 + temp_p - temp_d) / 2;
+                        p{n} = (1 + temp_p - temp_d) * 0.5;
                     end
                 case 'l2'
                     if (self.algo_ops_c_functions)
                         p = gt6DUpdateDualDetector_c(p, self.blobs, proj_bls, sigma, sigma_1, self.num_threads);
 
                         % % Or equivalently
-                        % proj_bls = internal_cell_sub_assign(proj_bls, bls);
-                        % proj_bls = internal_cell_prod_assign(proj_bls, sigma1);
-                        % p = internal_cell_sum_assign(p, proj_bls);
-                        % p = internal_cell_prod_assign(p, sigma1_1);
+                        % proj_bls = gtCxxMathsCellMinus(proj_bls, bls, 'copy', false, 'threads', self.num_threads);
+                        % proj_bls = gtCxxMathsCellTimes(proj_bls, sigma1, 'copy', false, 'threads', self.num_threads);
+                        % p = gtCxxMathsCellPlus(p, proj_bls, 'copy', false, 'threads', self.num_threads);
+                        % p = gtCxxMathsCellTimes(p, sigma1_1, 'copy', false, 'threads', self.num_threads);
                     else
                         for n = 1:numel(p)
                             p{n} = p{n} + sigma{n} .* (proj_bls{n} - self.blobs{n});
@@ -717,7 +717,7 @@ classdef Gt6DBlobReconstructor < Gt6DVolumeToBlobProjector
             do_l1_update = ~isempty(strfind(upper(algo), 'L1'));
             do_ls_update = ~isempty(strfind(upper(algo), 'LS'));
 
-            p1 = internal_cell_copy(self.currentSolution);
+            p1 = gtCxxMathsCellCopy(self.currentSolution, 'threads', self.num_threads);
 
             if (do_l1_update)
                 q_l1 = gtMathsGetSameSizeZeros(self.currentSolution);
@@ -781,7 +781,7 @@ classdef Gt6DBlobReconstructor < Gt6DVolumeToBlobProjector
 
             if (any(strcmpi(algo, {'6DLS', '6DL1'})))
                 proj_bls = self.compute_fwd_projection(self.currentSolution, false);
-                proj_bls = internal_cell_sub_assign(proj_bls, self.blobs);
+                proj_bls = gtCxxMathsCellMinus(proj_bls, self.blobs, 'copy', false, 'threads', self.num_threads);
                 value = value + gtMathsNorm_l2(proj_bls);
             end
 
diff --git a/zUtil_Maths/gtMathsCGSolveCell.m b/zUtil_Maths/gtMathsCGSolveCell.m
index 93ec86bfd4eed444d9617f287227a9b76f727860..57aa84c70635759a9bd50132c0fc3d112a0b6672 100644
--- a/zUtil_Maths/gtMathsCGSolveCell.m
+++ b/zUtil_Maths/gtMathsCGSolveCell.m
@@ -30,6 +30,12 @@ function [bestSol, flag, bestRes, bestIter, squareResiduals] = ...
 %       - 5: Bad Hessian: non-posite definite!
 %
 
+    try
+        num_threads = feature('NumCores');
+    catch
+        num_threads = 1;
+    end
+
     if (~exist('precond', 'var') || isempty(precond))
         precond = [];
     elseif (isnumeric(precond))
@@ -86,17 +92,17 @@ function [bestSol, flag, bestRes, bestIter, squareResiduals] = ...
         end
         % Initial residual which is exactly the b vector, since z = 0
         if (use_c_functions)
-            r = internal_cell_copy(b);
+            r = gtCxxMathsCellCopy(b, 'threads', num_threads);
         else
             r = b;
         end
     else
         if (use_c_functions)
-            z = internal_cell_copy(x0);
+            z = gtCxxMathsCellCopy(x0, 'threads', num_threads);
         else
             z = x0;
         end
-        r = measureResidual(A, b, z, use_c_functions);
+        r = measureResidual(A, b, z, use_c_functions, num_threads);
     end
 
     % Computing preconditioner
@@ -113,7 +119,7 @@ function [bestSol, flag, bestRes, bestIter, squareResiduals] = ...
 
     % First correction is gradient step
     if (use_c_functions)
-        d = internal_cell_prod_scalar_copy(y, -1.0);
+        d = gtCxxMathsCellTimes(y, -1.0, 'threads', num_threads);
     else
         d = cell(1, numCells);
         for ii = 1:numCells
@@ -157,7 +163,7 @@ function [bestSol, flag, bestRes, bestIter, squareResiduals] = ...
         end
         % Update the solution
         if (use_c_functions)
-            z = internal_cell_sub_scaled_assign(z, d, alpha);
+            z = gtCxxMathsCellMinus(z, d, 'scale', alpha, 'copy', false, 'threads', num_threads);
         else
             for ii = 1:numCells
                 z{ii} = z{ii} - alpha * d{ii};
@@ -166,11 +172,11 @@ function [bestSol, flag, bestRes, bestIter, squareResiduals] = ...
 
         % Evaluation of the residual
         if (mod(jj+1, 50) == 0) && false % <-- Let's disable for now
-            r = measureResidual(A, b, z, use_c_functions);
+            r = measureResidual(A, b, z, use_c_functions, num_threads);
         else
             % Let's update the residual
             if (use_c_functions)
-                r = internal_cell_sum_scaled_assign(r, q, alpha);
+                r = gtCxxMathsCellPlus(r, q, 'scale', alpha, 'copy', false, 'threads', num_threads);
             else
                 for ii = 1:numCells
                     r{ii} = r{ii} + alpha * q{ii};
@@ -232,8 +238,8 @@ function [bestSol, flag, bestRes, bestIter, squareResiduals] = ...
 
         % Update the correction to apply at the next iteration
         if (use_c_functions)
-            d = internal_cell_prod_scalar_assign(d, beta);
-            d = internal_cell_sub_assign(d, y);
+            d = gtCxxMathsCellTimes(d, beta, 'copy', false, 'threads', num_threads);
+            d = gtCxxMathsCellMinus(d, y, 'copy', false, 'threads', num_threads);
         else
             for ii = 1:numCells
                 d{ii} = -y{ii} + beta * d{ii};
@@ -283,12 +289,13 @@ function is_finite = isFinite(x)
     end
 end
 
-function r = measureResidual(A, b, z, use_c_functions)
+function r = measureResidual(A, b, z, use_c_functions, num_threads)
 % Evaluation of the true residual
     numCells = length(b);
     tempAz = A(z);
+    r = cell(size(b));
     if (use_c_functions)
-        r = internal_cell_sub_copy(b, tempAz);
+        r = gtCxxMathsCellMinus(b, tempAz, 'threads', num_threads);
     else
         for ii = 1:numCells
             r{ii} = b{ii} - tempAz{ii};
diff --git a/zUtil_Maths/gtMathsSIRTSolveCell.m b/zUtil_Maths/gtMathsSIRTSolveCell.m
index d913875e91c26b15c043d75f8756de2f46930caa..266144eb869213c55327108ef6cc5e49b193a38e 100644
--- a/zUtil_Maths/gtMathsSIRTSolveCell.m
+++ b/zUtil_Maths/gtMathsSIRTSolveCell.m
@@ -28,6 +28,12 @@ function [bestSol, flag, bestRes, bestIter, squareResiduals] = ...
 %       - 4: One of the scalar quantities became either too big or too small
 %
 
+    try
+        num_threads = feature('NumCores');
+    catch
+        num_threads = 1;
+    end
+
     if (~exist('verbose', 'var'))
         out = GtConditionalOutput(false);
     else
@@ -89,11 +95,11 @@ function [bestSol, flag, bestRes, bestIter, squareResiduals] = ...
 
     % Solution to the system
     if (use_c_functions)
-        z = internal_cell_copy(x0);
+        z = gtCxxMathsCellCopy(x0, 'threads', num_threads);
     else
         z = x0;
     end
-    r = measureResidual(A, b, z, use_c_functions);
+    r = measureResidual(A, b, z, use_c_functions, num_threads);
     nextSquareResidual = gtMathsSquareNorm(r);
 
     % Best solutions in the iterations (just in case we run till the end)
@@ -112,7 +118,7 @@ function [bestSol, flag, bestRes, bestIter, squareResiduals] = ...
 
         % Update the solution
         if (use_c_functions)
-            z = internal_cell_sum_assign(z, d);
+            z = gtCxxMathsCellPlus(z, d, 'copy', false, 'threads', num_threads);
         else
             for ii = 1:numCells
                 z{ii} = z{ii} + d{ii};
@@ -120,7 +126,7 @@ function [bestSol, flag, bestRes, bestIter, squareResiduals] = ...
         end
 
         % Evaluation of the residual
-        r = measureResidual(A, b, z, use_c_functions);
+        r = measureResidual(A, b, z, use_c_functions, num_threads);
 
         % computing beta and the weights
         nextSquareResidual = gtMathsSquareNorm(r);
@@ -176,12 +182,13 @@ function is_finite = isFinite(x)
     end
 end
 
-function r = measureResidual(A, b, z, use_c_functions)
+function r = measureResidual(A, b, z, use_c_functions, num_threads)
 % Evaluation of the true residual
     numCells = length(b);
     tempAz = A(z);
+    r = cell(size(b));
     if (use_c_functions)
-        r = internal_cell_sub_copy(b, tempAz);
+        r = gtCxxMathsCellMinus(b, tempAz, 'threads', num_threads);
     else
         for ii = 1:numCells
             r{ii} = b{ii} - tempAz{ii};
@@ -189,11 +196,11 @@ function r = measureResidual(A, b, z, use_c_functions)
     end
 end
 
-function p = backProjectResidual(At, r, Aweights, Atweights, use_c_functions)
+function p = backProjectResidual(At, r, Aweights, Atweights, use_c_functions, num_threads)
     if (use_c_functions)
-        p = internal_cell_div_copy(r, Aweights);
+        p = gtCxxMathsCellDivide(r, Aweights, 'threads', num_threads);
         p = At(p);
-        p = internal_cell_div_assign(p, Atweights);
+        p = gtCxxMathsCellDivide(p, Atweights, 'copy', false, 'threads', num_threads);
     else
         numCells = length(r);
         p = cell(1, numCells);
diff --git a/zUtil_Tests/gtTest_gt6DUpdateDualDetector_c.m b/zUtil_Tests/gtTest_gt6DUpdateDualDetector_c.m
index 93a924cc3c8dca6b10c15d11d18eb045a12f9000..32c8461e8a98734e3c0b3c17447953d353789dda 100644
--- a/zUtil_Tests/gtTest_gt6DUpdateDualDetector_c.m
+++ b/zUtil_Tests/gtTest_gt6DUpdateDualDetector_c.m
@@ -21,7 +21,7 @@ function gtTest_gt6DUpdateDualDetector_c()
         test_sigma{ii} = rand(size_vols, 'single');
     end
 
-    test_dual_correct = internal_cell_copy(test_dual);
+    test_dual_correct = gtCxxMathsCellCopy(test_dual);
 
     test_dual = gt6DUpdateDualDetector_c(test_dual, test_blobs, test_pblobs, test_sigma, test_sigma, num_threads);
 
diff --git a/zUtil_Tests/gtTest_gt6DUpdateDualL1_c.m b/zUtil_Tests/gtTest_gt6DUpdateDualL1_c.m
index 616628fb03c9e2100277b76ae3fb57584ed08bfd..2d73bb890216ac144e1d2f86ecf7a855d33c0093 100644
--- a/zUtil_Tests/gtTest_gt6DUpdateDualL1_c.m
+++ b/zUtil_Tests/gtTest_gt6DUpdateDualL1_c.m
@@ -17,7 +17,7 @@ function gtTest_gt6DUpdateDualL1_c()
         test_dsES{ii} = rand(size_vols, 'single');
     end
 
-    test_dual_correct = internal_cell_copy(test_dual);
+    test_dual_correct = gtCxxMathsCellCopy(test_dual);
 
     test_dual = gt6DUpdateDualL1_c(test_dual, test_dsES, 1, num_threads);
 
diff --git a/zUtil_Tests/gtTest_gt6DUpdatePrimal_c.m b/zUtil_Tests/gtTest_gt6DUpdatePrimal_c.m
index 513bc9af81a21d16d2eaf43f7482f9f82d17029a..6e0e51635e1dea77fb474d420bf50c3f6f12d024 100644
--- a/zUtil_Tests/gtTest_gt6DUpdatePrimal_c.m
+++ b/zUtil_Tests/gtTest_gt6DUpdatePrimal_c.m
@@ -16,7 +16,7 @@ function gtTest_gt6DUpdatePrimal_c()
     tau = 1;
     theta = 1;
 
-    test_primal_correct = internal_cell_copy({test_primal});
+    test_primal_correct = gtCxxMathsCellCopy({test_primal});
     test_primal_correct = test_primal_correct{1};
 
     [test_primal, test_ehn] = gt6DUpdatePrimal_c(test_primal, test_ehn, test_correction, tau, num_threads);