FloatVectorOperations requests findMinimum and findMaximum

Hi Jules, do you think you could add the following two methods to FloatVectorOperations? It maintains completeness with those in MathsFunctions.h and are useful for tasks like metering. Cheers.

[code] /** Finds the miniumum value in the given array. /
static float JUCE_CALLTYPE findMinimum (const float
src, int numValues) noexcept;

/** Finds the maximum value in the given array. */
static float JUCE_CALLTYPE findMaximum (const float* src, int numValues) noexcept;

[/code]

[code]float JUCE_CALLTYPE FloatVectorOperations::findMinimum (const float* src, int num) noexcept
{
#if JUCE_USE_SSE_INTRINSICS
const int numLongOps = num / 4;

if (numLongOps > 1 && FloatVectorHelpers::isSSE2Available())
{
    __m128 mn;

    #define JUCE_MIN_SSE_LOOP(loadOp) \
        mn = loadOp (src); \
        src += 4; \
        for (int i = 1; i < numLongOps; ++i) \
        { \
            const __m128 s = loadOp (src); \
            mn = _mm_min_ps (mn, s); \
            src += 4; \
        }

    if (FloatVectorHelpers::isAligned (src)) { JUCE_MIN_SSE_LOOP (_mm_load_ps) }
    else                                     { JUCE_MIN_SSE_LOOP (_mm_loadu_ps) }

    float localMin;

    {
        float mns[4];
        _mm_storeu_ps (mns, mn);
        FloatVectorHelpers::mmEmpty();

        localMin = jmin (mns[0], mns[1], mns[2], mns[3]);
    }

    num &= 3;

    if (num != 0)
    {
        for (int i = 0; i < num; ++i)
        {
            const float s = src[i];
            localMin = jmin (localMin, s);
        }
    }

    return localMin;
}

#endif

return juce::findMinimum (src, num);

}

float JUCE_CALLTYPE FloatVectorOperations::findMaximum (const float* src, int num) noexcept
{
#if JUCE_USE_SSE_INTRINSICS
const int numLongOps = num / 4;

if (numLongOps > 1 && FloatVectorHelpers::isSSE2Available())
{
    __m128 mx;

    #define JUCE_MAX_SSE_LOOP(loadOp) \
        mx = loadOp (src); \
        src += 4; \
        for (int i = 1; i < numLongOps; ++i) \
        { \
            const __m128 s = loadOp (src); \
            mx = _mm_max_ps (mx, s); \
            src += 4; \
        }

    if (FloatVectorHelpers::isAligned (src)) { JUCE_MAX_SSE_LOOP (_mm_load_ps) }
    else                                     { JUCE_MAX_SSE_LOOP (_mm_loadu_ps) }

    float localMax;

    {
        float mxs[4];
        _mm_storeu_ps (mxs, mx);
        FloatVectorHelpers::mmEmpty();

        localMax = jmax (mxs[0], mxs[1], mxs[2], mxs[3]);
    }

    num &= 3;

    if (num != 0)
    {
        for (int i = 0; i < num; ++i)
        {
            const float s = src[i];
            localMax = jmax (localMax, s);
        }
    }

    return localMax;
}

#endif

return juce::findMaximum (src, num);

}[/code]

I’ll take a version of that if you D.R.Y it a bit first!

Yeh, it was just a quick copy and replace of the existing findMinMax function. This version basically combines the two but it will be slightly slower as there’s a couple of extra conditionals in there. The other option would be to do the whole thing as a macro and pass either _mm_min_ps or _mm_max_ps as an argument as well as the jmin/jmax and the standard fallback funtion but it all starts to get a bit difficult to read then.

In FloatVectorHelpers namespace:

[code] static float findMinimumOrMaximum (const float* src, int num, bool isMinimum) noexcept
{
#if JUCE_USE_SSE_INTRINSICS
const int numLongOps = num / 4;

    if (numLongOps > 1 && FloatVectorHelpers::isSSE2Available())
    {
        __m128 val;

        #define JUCE_MINIMUMMAXIMUM_SSE_LOOP(loadOp, minMaxOp) \
            val = loadOp (src); \
            src += 4; \
            for (int i = 1; i < numLongOps; ++i) \
            { \
                const __m128 s = loadOp (src); \
                val = minMaxOp (val, s); \
                src += 4; \
            }

        if (isMinimum)
        {
            if (FloatVectorHelpers::isAligned (src)) { JUCE_MINIMUMMAXIMUM_SSE_LOOP (_mm_load_ps, _mm_min_ps) }
            else                                     { JUCE_MINIMUMMAXIMUM_SSE_LOOP (_mm_loadu_ps, _mm_min_ps) }
        }
        else
        {
            if (FloatVectorHelpers::isAligned (src)) { JUCE_MINIMUMMAXIMUM_SSE_LOOP (_mm_load_ps, _mm_max_ps) }
            else                                     { JUCE_MINIMUMMAXIMUM_SSE_LOOP (_mm_loadu_ps,_mm_max_ps) }
        }

        float localVal;

        {
            float vals[4];
            _mm_storeu_ps (vals, val);
            FloatVectorHelpers::mmEmpty();

            localVal = isMinimum ? jmin (vals[0], vals[1], vals[2], vals[3])
                                    : jmax (vals[0], vals[1], vals[2], vals[3]);
        }

        num &= 3;

        if (num != 0)
        {
            for (int i = 0; i < num; ++i)
            {
                const float s = src[i];
                localVal = isMinimum ? jmin (localVal, s) : jmax (localVal, s);
            }
        }

        return localVal;
    }
   #endif

    return isMinimum ? juce::findMinimum (src, num) : juce::findMaximum (src, num);
}

[/code]

[code]float JUCE_CALLTYPE FloatVectorOperations::findMinimum (const float* src, int num) noexcept
{
return FloatVectorHelpers::findMinimumOrMaximum (src, num, true);
}

float JUCE_CALLTYPE FloatVectorOperations::findMaximum (const float* src, int num) noexcept
{
return FloatVectorHelpers::findMinimumOrMaximum (src, num, false);
}
[/code]

Here’s the macro version if you prefer it:

[code]#define JUCE_MINIMUM_MAXIMUM_SSE_LOOP(loadOp, minMaxOp)
val = loadOp (src);
src += 4;
for (int i = 1; i < numLongOps; ++i)
{
const __m128 s = loadOp (src);
val = minMaxOp (val, s);
src += 4;
} \

#define FIND_MIN_OR_MAX(src, num, minMaxOp, minMaxFunction)
const int numLongOps = num / 4;

if (numLongOps > 1 && FloatVectorHelpers::isSSE2Available())
{
__m128 val;


if (FloatVectorHelpers::isAligned (src)) { JUCE_MINIMUM_MAXIMUM_SSE_LOOP (_mm_load_ps, minMaxOp) }
else { JUCE_MINIMUM_MAXIMUM_SSE_LOOP (_mm_loadu_ps, minMaxOp) }

float localVal;

{
float vals[4];
_mm_storeu_ps (vals, val);
FloatVectorHelpers::mmEmpty();

localVal = minMaxFunction (vals[0], vals[1], vals[2], vals[3]);
}

num &= 3;

if (num != 0)
{
for (int i = 0; i < num; ++i)
{
const float s = src[i];
localVal = minMaxFunction (localVal, s);
}
}

return localVal;
} [/code]

[code]float JUCE_CALLTYPE FloatVectorOperations::findMinimum (const float* src, int num) noexcept
{
#if JUCE_USE_INTRINSICS
FIND_MIN_OR_MAX(src, num, _mm_min_ps, jmin)
#endif
return juce::findMinimum (src, num);
}

float JUCE_CALLTYPE FloatVectorOperations::findMaximum (const float* src, int num) noexcept
{
#if JUCE_USE_INTRINSICS
FIND_MIN_OR_MAX(src, num, _mm_max_ps, jmax)
#endif
return juce::findMaximum (src, num);
}
[/code]

Ta! Hmm… I think I prefer the non-macro one. Will check in soon!