Files
openmm/tests/TestVectorizeGeneric.h
Evan Pretti 05472c9a81 Update file headers (#5074)
* Replace SimTK-containing file headers

* Update file headers for new Tinker reader files added
2025-09-23 10:27:26 -07:00

459 lines
18 KiB
C++

/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit. *
* See https://openmm.org/development. *
* *
* Portions copyright (c) 2014-2020 Stanford University and the Authors. *
* Authors: Daniel Towner *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#pragma once
/**
* This tests all sizes of vectorized operations using templated test code.
*/
#include <array>
#include <cstring>
#include <functional>
#include <iostream>
#include <iterator>
#include <numeric>
#include <memory.h>
#include <sstream>
#include <typeinfo>
/**
* Return the 32-bit integer bit pattern from the given floating-point value.
*/
static int32_t floatAsIntBits(float f) {
int32_t i;
memcpy(&i, &f, 4);
return i;
}
/**
* Compare two floating-point values using units-in-last-place (ULP) as a measure of equality. Two values
* which are only a few representable values apart can be considered to be equal. Note that IEEE
* operations (add, mul, etc.) will always be exact, but sequences of operations might be more than
* a few ULP apart, but still close enough to be considered equal. ULP comparisons work at any scale of
* number, unlike an epsilon-based approach.
*/
static bool almostEqual(float a, float b) {
// Maybe they really are equal.
if (a == b)
return true;
// Infinities and NANs are never equal to anything, even other nans and infinities.
if (std::isnan(a) || std::isinf(a) ||
std::isnan(b) || std::isinf(b))
return false;
// If they are different signs then they can't be equal. For two very small denormal values they might
// be very close to each other but either side of 0, but denormals are a corner case which don't deserve
// to be equal.
if (std::signbit(a) != std::signbit(b))
return false;
// The two numbers must be valid values with the same sign, so treat then as basic integers to
// get at their ULP values. If they are only a few ULP apart, then they are essentially equal.
int32_t intDiff = std::abs(floatAsIntBits(a) - floatAsIntBits(b));
return intDiff < 4;
}
static bool exactlyEqual(float a, float b) { return a == b; }
/**
* Write the contents of the given array-like object to a stream. No formatting is applied.
*/
template<typename FVEC>
void VecToStream(std::ostream& stream, const FVEC& vec)
{
constexpr int numElements = sizeof(FVEC) / sizeof(float);
const float* vptr = (const float*)&vec;
for (int i=0; i<numElements; ++i)
stream << vptr[i] << ", ";
}
/**
* Given two vector-like objects compared each of their elements for equality. The vector objects can be
* anything which in memory is a list of 32-bit floating-point values, so SIMD vectors, C arrays or
* C++ arrays would all be valid.
*/
template<typename S, typename T>
static void checkElementsEqual(const S& computed, const T& expected,
std::function<bool(float, float)> equal_fn,
const char* file, int line) {
// Both S and T should be arrays of floats of the same length.
static_assert(sizeof(T) == sizeof(S), "Array-like elements must have the same size");
constexpr int numElements = sizeof(S) / sizeof(float);
const float* computedPtr = (const float*)&computed;
const float* expectedPtr = (const float*)&expected;
std::ostringstream details;
details << "Error during test for type " << typeid(S).name() << '\n';
bool passed = true;
for (int i=0; i<numElements; ++i)
{
if (!equal_fn(computedPtr[i], expectedPtr[i]))
passed = false;
}
if (!passed)
{
details << "Values differ. ";
VecToStream(details, computed);
details << " and ";
VecToStream(details, expected);
OpenMM::throwException(file, line, details.str());
}
}
#define ASSERT_VEC_EQUAL(computed, expected) {checkElementsEqual(computed, expected, exactlyEqual, __FILE__, __LINE__);}
#define ASSERT_VEC_ALMOST_EQUAL(computed, expected) {checkElementsEqual(computed, expected, almostEqual, __FILE__, __LINE__);}
static float getRandomFloat () {
// Between -50 and 50.
return float(rand()) / float(RAND_MAX/100.0f) - 50.0f;
}
/**
* Given an array-like memory object containing floats, apply the given function to every element.
*/
template<typename FVEC>
FVEC applyUnaryFn(const FVEC& v, std::function<float(float)> fn) {
constexpr int numElements = sizeof(FVEC) / sizeof(float);
FVEC result;
float* rp = (float*)&result;
const float* vp = (const float*)&v;
for (int i=0; i<numElements; ++i)
rp[i] = fn(vp[i]);
return result;
}
/**
* Given an array-like memory object containing floats, apply the given function to every element.
*/
template<typename FVEC>
FVEC applyBinaryFn(const FVEC& a, const FVEC& b, std::function<float(float, float)> fn) {
constexpr int numElements = sizeof(FVEC) / sizeof(float);
FVEC result;
float* rp = (float*)&result;
const float* ap = (const float*)&a;
const float* bp = (const float*)&b;
for (int i=0; i<numElements; ++i)
rp[i] = fn(ap[i], bp[i]);
return result;
}
/**
* Provide a test fixture class which underpins all verification for a given
* type of vector SIMD implementation, as well as providing common utility functions
*/
template<typename FVEC>
class TestFvec {
public:
static constexpr int numElements = sizeof(FVEC) / sizeof(float);
void testInitializers() const;
void testUnaryOps() const;
void testBinaryOps() const;
void testUtilities() const;
void testBlendAndCompare() const;
void testTranspose() const;
static void testAll() {
TestFvec<FVEC> testUnit;
testUnit.testInitializers();
testUnit.testUnaryOps();
testUnit.testBinaryOps();
testUnit.testUtilities();
testUnit.testBlendAndCompare();
testUnit.testTranspose();
}
FVEC getRandomFvec() const {
union {
FVEC v;
float f[numElements];
};
for (auto& e : f)
e = getRandomFloat();
return v;
}
};
template<typename FVEC>
void TestFvec<FVEC>::testInitializers() const {
FVEC computedZero = {};
float expectedZero[numElements] = {};
ASSERT_VEC_EQUAL(computedZero, expectedZero);
FVEC computedBroadcast(14.5f);
float expectedBroadcast[numElements];
std::fill_n(expectedBroadcast, numElements, 14.5f);
ASSERT_VEC_EQUAL(computedBroadcast, expectedBroadcast);
float expectedArray[numElements];
std::iota(expectedArray, expectedArray + numElements, 23);
FVEC computedFromLoad(expectedArray);
ASSERT_VEC_EQUAL(computedFromLoad, expectedArray);
// Gather values from a table. Variants for both one vector and two vector gathers are provided.
// The indexes to gather (multiples of 7) are also generated, along with the expected answers.
float gatherTable[2048];
for (int i=0; i<2048;++i)
gatherTable[i] = -i; // Same index to make it easy to debug, but negative to avoid copying idx.
int gatherIndexes[numElements];
float gatherIndexesAsFloat[numElements]; // Same as above, but in float format.
float expectedGather0[numElements];
float expectedGather1[numElements];
for (int i=0; i<numElements; ++i)
{
gatherIndexes[i] = i * 7;
gatherIndexesAsFloat[i] = float(gatherIndexes[i]);
expectedGather0[i] = -(i * 7);
expectedGather1[i] = -(i * 7) - 1; // Each value is one less than previous.
}
// Single value gather
FVEC computedFromGather(gatherTable, gatherIndexes);
ASSERT_VEC_EQUAL(computedFromGather, expectedGather0);
// Pair-wise vector gather. The first values should be the same as a normal gather, and the
// second are just increments from the first. Note that there musty be some suitable conversion
// from a floating-point index (i.e., an integer value in float format), and the type required
// for the second operand of gatherVecPair. gatherVecPair can then take either an actual
// float vector, or some suitable format like ivec4 or ivec8.
FVEC findex(gatherIndexesAsFloat);
FVEC p0, p1;
gatherVecPair(gatherTable, findex, p0, p1);
ASSERT_VEC_EQUAL(p0, expectedGather0);
ASSERT_VEC_EQUAL(p1, expectedGather1);
}
template<typename FVEC>
void TestFvec<FVEC>::testUnaryOps() const {
const auto v = getRandomFvec();
// Note that these are exact comparisons because all these SIMD operators are
// just applying the scalar operator, so there should be no loss of precision.
ASSERT_VEC_EQUAL(abs(v), applyUnaryFn(v, [](float x) { return std::abs(x);} ));
ASSERT_VEC_EQUAL(-v, applyUnaryFn(v, [](float x) { return 0 - x;} ));
ASSERT_VEC_EQUAL(floor(v), applyUnaryFn(v, [](float x) { return std::floor(x);} ));
ASSERT_VEC_EQUAL(ceil(v), applyUnaryFn(v, [](float x) { return std::ceil(x);} ));
ASSERT_VEC_EQUAL(round(v), applyUnaryFn(v, [](float x) { return std::round(x);} ));
// Borrow a few other functions to test sqrt neatly.
const auto positiveValue = abs(v) + 1;
ASSERT_VEC_ALMOST_EQUAL(sqrt(positiveValue * positiveValue), positiveValue);
ASSERT_VEC_ALMOST_EQUAL(rsqrt(positiveValue * positiveValue), 1.0f / abs(positiveValue));
}
template<typename FVEC>
void TestFvec<FVEC>::testBinaryOps() const {
const auto v0 = getRandomFvec();
const auto v1 = getRandomFvec();
// Note that most of these are exact comparisons because all these SIMD operators are
// just applying the scalar operator, so there should be no loss of precision. The one
// exception is division, which does often do something slightly different
// since division is an expensive operation (e.g., multiply by reciprocal).
// Binary operators.
ASSERT_VEC_EQUAL(v0 + v1, applyBinaryFn(v0, v1, std::plus<float>()));
ASSERT_VEC_EQUAL(v0 - v1, applyBinaryFn(v0, v1, std::minus<float>()));
ASSERT_VEC_EQUAL(v0 * v1, applyBinaryFn(v0, v1, std::multiplies<float>()));
ASSERT_VEC_ALMOST_EQUAL(v0 / v1, applyBinaryFn(v0, v1, std::divides<float>()));
// Assignment operators.
auto addAssign = v0;
addAssign += v1;
ASSERT_VEC_EQUAL(addAssign, applyBinaryFn(v0, v1, std::plus<float>()));
auto subAssign = v0;
subAssign -= v1;
ASSERT_VEC_EQUAL(subAssign, applyBinaryFn(v0, v1, std::minus<float>()));
auto mulAssign = v0;
mulAssign *= v1;
ASSERT_VEC_EQUAL(mulAssign, applyBinaryFn(v0, v1, std::multiplies<float>()));
auto divAssign = v0;
divAssign /= v1;
ASSERT_VEC_ALMOST_EQUAL(divAssign, applyBinaryFn(v0, v1, std::divides<float>()));
// Binary ops between SIMD and scalar.
const float f = getRandomFloat();
const FVEC fdup(f);
ASSERT_VEC_EQUAL(v0 + f, applyBinaryFn(v0, fdup, std::plus<float>()));
ASSERT_VEC_EQUAL(f + v0, applyBinaryFn(fdup, v0, std::plus<float>()));
ASSERT_VEC_EQUAL(v0 - f, applyBinaryFn(v0, fdup, std::minus<float>()));
ASSERT_VEC_EQUAL(f - v0, applyBinaryFn(fdup, v0, std::minus<float>()));
ASSERT_VEC_EQUAL(v0 * f, applyBinaryFn(v0, fdup, std::multiplies<float>()));
ASSERT_VEC_EQUAL(f * v0, applyBinaryFn(fdup, v0, std::multiplies<float>()));
ASSERT_VEC_ALMOST_EQUAL(v0 / f, applyBinaryFn(v0, fdup, std::divides<float>()));
ASSERT_VEC_ALMOST_EQUAL(f / v0, applyBinaryFn(fdup, v0, std::divides<float>()));
// Binary functions.
ASSERT_VEC_EQUAL(min(v0, v1),
applyBinaryFn(v0, v1, [](float x, float y) { return std::min<float>(x, y); }));
ASSERT_VEC_EQUAL(max(v0, v1),
applyBinaryFn(v0, v1, [](float x, float y) { return std::max<float>(x, y); }));
}
template<typename FVEC>
void TestFvec<FVEC>::testTranspose() const {
// A table of random data to transpose.
float table[numElements * 4];
for (auto& e : table) e = std::round(getRandomFloat());
// Load the table row data into vectors.
const auto i0 = FVEC(table + 0 * numElements);
const auto i1 = FVEC(table + 1 * numElements);
const auto i2 = FVEC(table + 2 * numElements);
const auto i3 = FVEC(table + 3 * numElements);
// Manually transpose the data.
std::array<float, numElements * 4> expectedTranspose;
for (auto r=0; r<4; ++r)
{
for (auto c=0; c<numElements; ++c)
{
expectedTranspose[c * 4 + r] = table[r * numElements + c];
}
}
fvec4 computedTranspose[numElements];
transpose(i0, i1, i2, i3, computedTranspose);
ASSERT_VEC_EQUAL(computedTranspose, expectedTranspose);
FVEC o0, o1, o2, o3;
transpose(computedTranspose, o0, o1, o2, o3);
ASSERT_VEC_EQUAL(i0, o0);
ASSERT_VEC_EQUAL(i1, o1);
ASSERT_VEC_EQUAL(i2, o2);
ASSERT_VEC_EQUAL(i3, o3);
}
template<typename FVEC>
void TestFvec<FVEC>::testBlendAndCompare() const {
const FVEC zero = {};
const FVEC allOne(1.0f);
const FVEC allTwo(2.0f);
// Note that different targets use different types of mask, so rather than checking
// the mask directly, instead check the output of using the mask as a blend to provide
// an indirect test.
const auto maskNone = FVEC::expandBitsToMask(0);
ASSERT_VEC_EQUAL(blend(allOne, allTwo, maskNone), allOne);
ASSERT_VEC_EQUAL(blendZero(allOne, maskNone), zero);
const auto maskAll = FVEC::expandBitsToMask(-1);
ASSERT_VEC_EQUAL(blend(allOne, allTwo, maskAll), allTwo);
ASSERT_VEC_EQUAL(blendZero(allOne, maskAll), allOne);
// Repeating pattern big enough to do most SIMD lengths.
const int bitmask = 0b1100001101101001;
const auto maskSome = FVEC::expandBitsToMask(bitmask);
float expectedMaskSome[numElements];
float expectedZeroMaskSome[numElements];
for (int i=0; i<numElements; ++i)
{
expectedMaskSome[i] = (bitmask & (1 << i)) ? 2.0f : 1.0f;
expectedZeroMaskSome[i] = (bitmask & (1 << i)) ? 2.0f : 0.0f;
}
ASSERT_VEC_EQUAL(blend(allOne, allTwo, maskSome), expectedMaskSome);
ASSERT_VEC_EQUAL(blendZero(allTwo, maskSome), expectedZeroMaskSome);
// Test comparisons too, using random numbers, and then blending in either 0 or 1.
const auto v0 = getRandomFvec();
const auto v1 = getRandomFvec();
ASSERT_VEC_EQUAL(blend(allOne, allTwo, v0 < v1),
applyBinaryFn(v0, v1, [](float x, float y) { return x < y ? 2.0f : 1.0f; }));
ASSERT_VEC_EQUAL(blend(allOne, allTwo, v0 <= v1),
applyBinaryFn(v0, v1, [](float x, float y) { return x <= y ? 2.0f : 1.0f; }));
ASSERT_VEC_EQUAL(blend(allOne, allTwo, v0 <= v0), allTwo);
ASSERT_VEC_EQUAL(blend(allOne, allTwo, v0 > v1),
applyBinaryFn(v0, v1, [](float x, float y) { return x > y ? 2.0f : 1.0f; }));
ASSERT_VEC_EQUAL(blend(allOne, allTwo, v0 >= v1),
applyBinaryFn(v0, v1, [](float x, float y) { return x >= y ? 2.0f : 1.0f; }));
ASSERT_VEC_EQUAL(blend(allOne, allTwo, v0 >= v0), allTwo);
}
template<typename FVEC>
void TestFvec<FVEC>::testUtilities() const {
/** Use rounded (i.e., integer) values for the reductions. Reduction operations are very sensitive
* to ordering. The correct result is found by sorting values into ascending order to ensure that
* similar sized numbers are accumulated earlier than less similar numbers. If completely random
* numbers were used, this effect would show up here, making it more a test of what random numbers
* you got, than of the code itself. By rounding to integers, the numbers will behave sanely for the
* reduction, meaning it is a test of the reduction, and not of the format.
*/
const auto v0 = round(getRandomFvec());
const auto v1 = round(getRandomFvec());
const auto v2 = round(getRandomFvec());
const float* v0p = (const float*)&v0;
const float* v1p = (const float*)&v1;
const float* v2p = (const float*)&v2;
const auto expectedRedAddV0 = std::accumulate(v0p, v0p + numElements, 0.0f);
const auto expectedRedAddV1 = std::accumulate(v1p, v1p + numElements, 0.0f);
const auto expectedRedAddV2 = std::accumulate(v2p, v2p + numElements, 0.0f);
ASSERT_VEC_EQUAL(reduceAdd(v0), expectedRedAddV0);
// Reduction of three vectors by addition into a single 3-element vector. Note that the final element
// of the reduction is undefined, so the expected value copies over whatever that undefined value is.
const auto computedRed3 = reduceToVec3(v0, v1, v2);
const auto expectedRed3 = fvec4(expectedRedAddV0, expectedRedAddV1, expectedRedAddV2, computedRed3[3]);
ASSERT_VEC_EQUAL(computedRed3, expectedRed3);
}