FloatVectorOperations requests findMinimum and findMaximum


#1

Hi Jules, do you think you could add the following two methods to FloatVectorOperations? It maintains completeness with those in MathsFunctions.h and are useful for tasks like metering. Cheers.

[code] /** Finds the miniumum value in the given array. /
static float JUCE_CALLTYPE findMinimum (const float
src, int numValues) noexcept;

/** Finds the maximum value in the given array. */
static float JUCE_CALLTYPE findMaximum (const float* src, int numValues) noexcept;

[/code]

[code]float JUCE_CALLTYPE FloatVectorOperations::findMinimum (const float* src, int num) noexcept
{
#if JUCE_USE_SSE_INTRINSICS
const int numLongOps = num / 4;

if (numLongOps > 1 && FloatVectorHelpers::isSSE2Available())
{
    __m128 mn;

    #define JUCE_MIN_SSE_LOOP(loadOp) \
        mn = loadOp (src); \
        src += 4; \
        for (int i = 1; i < numLongOps; ++i) \
        { \
            const __m128 s = loadOp (src); \
            mn = _mm_min_ps (mn, s); \
            src += 4; \
        }

    if (FloatVectorHelpers::isAligned (src)) { JUCE_MIN_SSE_LOOP (_mm_load_ps) }
    else                                     { JUCE_MIN_SSE_LOOP (_mm_loadu_ps) }

    float localMin;

    {
        float mns[4];
        _mm_storeu_ps (mns, mn);
        FloatVectorHelpers::mmEmpty();

        localMin = jmin (mns[0], mns[1], mns[2], mns[3]);
    }

    num &= 3;

    if (num != 0)
    {
        for (int i = 0; i < num; ++i)
        {
            const float s = src[i];
            localMin = jmin (localMin, s);
        }
    }

    return localMin;
}

#endif

return juce::findMinimum (src, num);

}

float JUCE_CALLTYPE FloatVectorOperations::findMaximum (const float* src, int num) noexcept
{
#if JUCE_USE_SSE_INTRINSICS
const int numLongOps = num / 4;

if (numLongOps > 1 && FloatVectorHelpers::isSSE2Available())
{
    __m128 mx;

    #define JUCE_MAX_SSE_LOOP(loadOp) \
        mx = loadOp (src); \
        src += 4; \
        for (int i = 1; i < numLongOps; ++i) \
        { \
            const __m128 s = loadOp (src); \
            mx = _mm_max_ps (mx, s); \
            src += 4; \
        }

    if (FloatVectorHelpers::isAligned (src)) { JUCE_MAX_SSE_LOOP (_mm_load_ps) }
    else                                     { JUCE_MAX_SSE_LOOP (_mm_loadu_ps) }

    float localMax;

    {
        float mxs[4];
        _mm_storeu_ps (mxs, mx);
        FloatVectorHelpers::mmEmpty();

        localMax = jmax (mxs[0], mxs[1], mxs[2], mxs[3]);
    }

    num &= 3;

    if (num != 0)
    {
        for (int i = 0; i < num; ++i)
        {
            const float s = src[i];
            localMax = jmax (localMax, s);
        }
    }

    return localMax;
}

#endif

return juce::findMaximum (src, num);

}[/code]


#2

I’ll take a version of that if you D.R.Y it a bit first!


#3

Yeh, it was just a quick copy and replace of the existing findMinMax function. This version basically combines the two but it will be slightly slower as there’s a couple of extra conditionals in there. The other option would be to do the whole thing as a macro and pass either _mm_min_ps or _mm_max_ps as an argument as well as the jmin/jmax and the standard fallback funtion but it all starts to get a bit difficult to read then.

In FloatVectorHelpers namespace:

[code] static float findMinimumOrMaximum (const float* src, int num, bool isMinimum) noexcept
{
#if JUCE_USE_SSE_INTRINSICS
const int numLongOps = num / 4;

    if (numLongOps > 1 && FloatVectorHelpers::isSSE2Available())
    {
        __m128 val;

        #define JUCE_MINIMUMMAXIMUM_SSE_LOOP(loadOp, minMaxOp) \
            val = loadOp (src); \
            src += 4; \
            for (int i = 1; i < numLongOps; ++i) \
            { \
                const __m128 s = loadOp (src); \
                val = minMaxOp (val, s); \
                src += 4; \
            }

        if (isMinimum)
        {
            if (FloatVectorHelpers::isAligned (src)) { JUCE_MINIMUMMAXIMUM_SSE_LOOP (_mm_load_ps, _mm_min_ps) }
            else                                     { JUCE_MINIMUMMAXIMUM_SSE_LOOP (_mm_loadu_ps, _mm_min_ps) }
        }
        else
        {
            if (FloatVectorHelpers::isAligned (src)) { JUCE_MINIMUMMAXIMUM_SSE_LOOP (_mm_load_ps, _mm_max_ps) }
            else                                     { JUCE_MINIMUMMAXIMUM_SSE_LOOP (_mm_loadu_ps,_mm_max_ps) }
        }

        float localVal;

        {
            float vals[4];
            _mm_storeu_ps (vals, val);
            FloatVectorHelpers::mmEmpty();

            localVal = isMinimum ? jmin (vals[0], vals[1], vals[2], vals[3])
                                    : jmax (vals[0], vals[1], vals[2], vals[3]);
        }

        num &= 3;

        if (num != 0)
        {
            for (int i = 0; i < num; ++i)
            {
                const float s = src[i];
                localVal = isMinimum ? jmin (localVal, s) : jmax (localVal, s);
            }
        }

        return localVal;
    }
   #endif

    return isMinimum ? juce::findMinimum (src, num) : juce::findMaximum (src, num);
}

[/code]

[code]float JUCE_CALLTYPE FloatVectorOperations::findMinimum (const float* src, int num) noexcept
{
return FloatVectorHelpers::findMinimumOrMaximum (src, num, true);
}

float JUCE_CALLTYPE FloatVectorOperations::findMaximum (const float* src, int num) noexcept
{
return FloatVectorHelpers::findMinimumOrMaximum (src, num, false);
}
[/code]


#4

Here’s the macro version if you prefer it:

[code]#define JUCE_MINIMUM_MAXIMUM_SSE_LOOP(loadOp, minMaxOp)
val = loadOp (src);
src += 4;
for (int i = 1; i < numLongOps; ++i)
{
const __m128 s = loadOp (src);
val = minMaxOp (val, s);
src += 4;
} \

#define FIND_MIN_OR_MAX(src, num, minMaxOp, minMaxFunction)
const int numLongOps = num / 4;

if (numLongOps > 1 && FloatVectorHelpers::isSSE2Available())
{
__m128 val;


if (FloatVectorHelpers::isAligned (src)) { JUCE_MINIMUM_MAXIMUM_SSE_LOOP (_mm_load_ps, minMaxOp) }
else { JUCE_MINIMUM_MAXIMUM_SSE_LOOP (_mm_loadu_ps, minMaxOp) }

float localVal;

{
float vals[4];
_mm_storeu_ps (vals, val);
FloatVectorHelpers::mmEmpty();

localVal = minMaxFunction (vals[0], vals[1], vals[2], vals[3]);
}

num &= 3;

if (num != 0)
{
for (int i = 0; i < num; ++i)
{
const float s = src[i];
localVal = minMaxFunction (localVal, s);
}
}

return localVal;
} [/code]

[code]float JUCE_CALLTYPE FloatVectorOperations::findMinimum (const float* src, int num) noexcept
{
#if JUCE_USE_INTRINSICS
FIND_MIN_OR_MAX(src, num, _mm_min_ps, jmin)
#endif
return juce::findMinimum (src, num);
}

float JUCE_CALLTYPE FloatVectorOperations::findMaximum (const float* src, int num) noexcept
{
#if JUCE_USE_INTRINSICS
FIND_MIN_OR_MAX(src, num, _mm_max_ps, jmax)
#endif
return juce::findMaximum (src, num);
}
[/code]


#5

Ta! Hmm… I think I prefer the non-macro one. Will check in soon!