AcademySoftwareFoundation · lgritz · Feb 13, 2026
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -343,31 +343,6 @@ jobs:
                             OPENIMAGEIO_CMAKE_FLAGS="-DUSE_PYTHON=0"
                             CMAKE_BUILD_TYPE=RelWithDebInfo
 
-          - desc: icc/C++17 llvm14 py3.9 oiio-2.5 avx2
-            nametag: linux-icc
-            runner: ubuntu-latest
-            container: aswf/ci-osl:2023-clang15
-            cc_compiler: icc
-            cxx_compiler: icpc
-            cxx_std: 17
-            fmt_ver: 7.1.3
-            opencolorio_ver: v2.3.2
-            openimageio_ver: v2.5.17.0
-            # Changes to OIIO's simd.h starting in commit 68666db9 (from PR
-            # #4187) seem to trigger compiler bugs in icc and generate wrong
-            # SIMD code. It's probably not worth tracking down for just this
-            # obsolete compiler. Just lock down to OIIO 2.5 for icc builds to
-            # avoid the problem.
-            # openimageio_ver: e41ac03c0b21  # works
-            # openimageio_ver: 68666db994d5 # broken
-            python_ver: "3.10"
-            pybind11_ver: v2.10.0
-            # simd: avx2,f16c
-            batched: b8_AVX2_noFMA
-            setenvs: export OSL_CMAKE_FLAGS="-DSTOP_ON_WARNING=OFF -DEXTRA_CPP_ARGS=-fp-model=consistent"
-                            OPENIMAGEIO_CMAKE_FLAGS=-DBUILD_FMT_VERSION=7.1.3
-                            USE_OPENVDB=0
-                            OPENCOLORIO_CMAKE_FLAGS="-DCMAKE_CXX_COMPILER=g++"
           - desc: icx/C++17 llvm14 py3.10 oiio-3.0 avx2
             nametag: linux-icx
             runner: ubuntu-latest
@@ -380,7 +355,7 @@ jobs:
             cxx_std: 17
             fmt_ver: 7.1.3
             opencolorio_ver: v2.3.2
-            openimageio_ver: v3.0.11.0
+            openimageio_ver: v3.0.15.0
             python_ver: "3.10"
             pybind11_ver: v2.10.0
             simd: avx2,f16c

diff --git a/src/include/OSL/Imathx/Imathx.h b/src/include/OSL/Imathx/Imathx.h
@@ -248,9 +248,7 @@ affineInverse(const Matrix44 &m)
 // differently than the LLVM IR version.
 // NOTE:  only using "inline" to get ODR (One Definition Rule) behavior
 static inline OSL_HOSTDEVICE Matrix44
-#if !OSL_INTEL_CLASSIC_COMPILER_VERSION
     OSL_GNUC_ATTRIBUTE(optimize("fp-contract=off"))
-#endif
 nonAffineInverse(const Matrix44 &source);
 
 Matrix44 OSL_HOSTDEVICE nonAffineInverse(const Matrix44 &source)

diff --git a/src/include/OSL/mask.h b/src/include/OSL/mask.h
@@ -20,19 +20,6 @@ OSL_NAMESPACE_BEGIN
 using std::popcount;
 using std::countr_zero;
 
-#elif OSL_INTEL_CLASSIC_COMPILER_VERSION
-
-#include <immintrin.h>
-
-OSL_FORCEINLINE int popcount(uint32_t x) noexcept { return _mm_popcnt_u32(x);}
-OSL_FORCEINLINE int popcount(uint64_t x) noexcept { return _mm_popcnt_u64(x); }
-OSL_FORCEINLINE int countr_zero(uint32_t x) noexcept { return _bit_scan_forward(x); }
-OSL_FORCEINLINE int countr_zero(uint64_t x) noexcept {
-    unsigned __int32 index;
-    _BitScanForward64(&index, x);
-    return static_cast<int>(index);
-}
-
 #elif defined(__GNUC__) || defined(__clang__)
 
 OSL_FORCEINLINE int popcount(uint32_t x) noexcept { return __builtin_popcount(x); }

diff --git a/src/include/OSL/oslnoise.h b/src/include/OSL/oslnoise.h
@@ -647,7 +647,7 @@ OSL_FORCEINLINE OSL_HOSTDEVICE Dual2<float> select(const bool b, const Dual2<flo
     // versus requiring a stack location.
     // Without this work per component, gathers & scatters were being emitted
     // when used inside SIMD loops.
-#if OSL_ANY_CLANG && !OSL_INTEL_CLASSIC_COMPILER_VERSION && !OSL_INTEL_LLVM_COMPILER_VERSION
+#if OSL_ANY_CLANG && !OSL_INTEL_LLVM_COMPILER_VERSION
     // Clang's vectorizor was really insistent that a select operation could not be replaced
     // with control flow, so had to re-introduce the ? operator to make it happy
     return Dual2<float> (
@@ -2254,7 +2254,6 @@ OSL_FORCEINLINE OSL_HOSTDEVICE void perlin (Dual2<Vec3> &result, const H &hash,
 
     // With Dual2<Vec3> data types, a lot of code is generated below
     // which caused some runaway compiler memory consumption when vectorizing
-#if !OSL_INTEL_CLASSIC_COMPILER_VERSION
     auto l_result = OIIO::lerp (
                OIIO::trilerp (grad (hash (X  , Y  , Z  , W  ), fx     , fy     , fz     , fw     ),
                               grad (hash (X+1, Y  , Z  , W  ), fx-1.0f, fy     , fz     , fw     ),
@@ -2275,40 +2274,6 @@ OSL_FORCEINLINE OSL_HOSTDEVICE void perlin (Dual2<Vec3> &result, const H &hash,
                               grad (hash (X+1, Y+1, Z+1, W+1), fx-1.0f, fy-1.0f, fz-1.0f, fw-1.0f),
                               u, v, t),
                s);
-#else
-    // Use a loop to avoid repeating code gen twice
-    Dual2<Vec3> v0, v1;
-    // GCC emits -Wmaybe-uninitialized errors for v0,v1.
-    // To avoid, GCC uses reference version above
-
-    // Clang doesn't want to vectorize with the vIndex loop
-    // To enable vectorization, Clang uses reference version above
-    OSL_INTEL_PRAGMA(nounroll_and_jam)
-    for(int vIndex=0; vIndex < 2;++vIndex) {
-        int vW = W + vIndex;
-        Dual2<float> vfw = fw - float(vIndex);
-
-        Dual2<Vec3> vResult = OIIO::trilerp (
-            grad (hash (X  , Y  , Z  , vW  ), fx     , fy     , fz     , vfw     ),
-            grad (hash (X+1, Y  , Z  , vW  ), fx-1.0f, fy     , fz     , vfw     ),
-            grad (hash (X  , Y+1, Z  , vW  ), fx     , fy-1.0f, fz     , vfw     ),
-            grad (hash (X+1, Y+1, Z  , vW  ), fx-1.0f, fy-1.0f, fz     , vfw     ),
-            grad (hash (X  , Y  , Z+1, vW  ), fx     , fy     , fz-1.0f, vfw     ),
-            grad (hash (X+1, Y  , Z+1, vW  ), fx-1.0f, fy     , fz-1.0f, vfw     ),
-            grad (hash (X  , Y+1, Z+1, vW  ), fx     , fy-1.0f, fz-1.0f, vfw     ),
-            grad (hash (X+1, Y+1, Z+1, vW  ), fx-1.0f, fy-1.0f, fz-1.0f, vfw     ),
-            u, v, t);
-        // Rather than dynamic indexing array,
-        // use masking to store outputs,
-        // to better enable SROA (Scalar Replacement of Aggregates) optimizations
-        if (vIndex == 0) {
-            v0 = vResult;
-        } else {
-            v1 = vResult;
-        }
-    }
-    auto l_result = OIIO::lerp (v0, v1, s);
-#endif
 
     result = scale4 (l_result);
     }

diff --git a/src/include/OSL/sfmath.h b/src/include/OSL/sfmath.h
@@ -68,23 +68,7 @@ namespace sfm
        OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     */
 
-#if OSL_INTEL_CLASSIC_COMPILER_VERSION
-    // std::isinf wasn't vectorizing and was branchy. This slightly
-    // perturbed version fairs better and is branch free when vectorized
-    // with the Intel compiler.
-    OSL_FORCEINLINE OSL_HOSTDEVICE int isinf (float x) {
-        int r = 0;
-        // NOTE: using bitwise | to avoid branches
-        if (!(std::isfinite(x)|std::isnan(x))) {
-            r = static_cast<int>(copysignf(1.0f,x));
-        }
-        return r;
-    }
-#else
-    // Other compilers don't seem to vectorize well no matter what, so just
-    // use the standard version.
     using std::isinf;
-#endif
 
     template<typename T>
     OSL_FORCEINLINE OSL_HOSTDEVICE T
@@ -191,7 +175,7 @@ namespace sfm
         }
     }
 
-#if OSL_ANY_CLANG && !OSL_INTEL_CLASSIC_COMPILER_VERSION && !OSL_INTEL_LLVM_COMPILER_VERSION
+#if OSL_ANY_CLANG && !OSL_INTEL_LLVM_COMPILER_VERSION
 
     // To make clang's loop vectorizor happy
     // we need to make sure result of min and max

diff --git a/src/include/OSL/wide.h b/src/include/OSL/wide.h
@@ -1904,7 +1904,7 @@ struct WideImpl<const Dual2<ElementT>[], WidthT, true /*IsConstT */> {
 }  // namespace pvt
 
 
-#if OSL_INTEL_CLASSIC_COMPILER_VERSION || OSL_GNUC_VERSION
+#if OSL_GNUC_VERSION
 // Workaround for error #3466: inheriting constructors must be inherited from a direct base class
 #    define __OSL_INHERIT_BASE_CTORS(DERIVED, BASE) \
         using Base = typename DERIVED::BASE;        \
@@ -3210,8 +3210,7 @@ template<typename DataT, int WidthT>
 OSL_FORCEINLINE bool
 testIfAnyLaneIsNonZero(const Wide<DataT, WidthT>& wvalues)
 {
-#if OSL_ANY_CLANG && !OSL_INTEL_CLASSIC_COMPILER_VERSION \
-    && !OSL_INTEL_LLVM_COMPILER_VERSION
+#if OSL_ANY_CLANG && !OSL_INTEL_LLVM_COMPILER_VERSION
     int anyLaneIsOn = 0;
     OSL_OMP_PRAGMA(omp simd simdlen(WidthT) reduction(max : anyLaneIsOn))
     for (int i = 0; i < WidthT; ++i) {

diff --git a/src/liboslexec/opcolor_impl.h b/src/liboslexec/opcolor_impl.h
@@ -266,8 +266,7 @@ hsv_to_rgb(const COLOR3& hsv)
         // Avoid switch statement vectorizor doesn't like
         // Also avoid if/else nest which some optimizers might
         // convert back into a switch statement
-#    if OSL_ANY_CLANG && !OSL_INTEL_CLASSIC_COMPILER_VERSION \
-        && !OSL_INTEL_LLVM_COMPILER_VERSION
+#    if OSL_ANY_CLANG && !OSL_INTEL_LLVM_COMPILER_VERSION
         // Clang was still transforming series of if's back into a switch.
         // Alternate between == and <= comparisons to avoid
 #        define __OSL_ASC_EQ <=

diff --git a/src/liboslexec/wide/wide_opcolor.cpp b/src/liboslexec/wide/wide_opcolor.cpp
@@ -302,7 +302,7 @@ namespace {
 
 // Note: Clang 14 seems to no longer allow vectorizing these loops
 #if ((OSL_CLANG_VERSION && OSL_CLANG_VERSION < 140000) \
-     || OSL_INTEL_CLASSIC_COMPILER_VERSION || OSL_INTEL_LLVM_COMPILER_VERSION)
+     || OSL_INTEL_LLVM_COMPILER_VERSION)
 #    define WIDE_TRANSFORMC_OMP_SIMD_LOOP(...) OSL_OMP_SIMD_LOOP(__VA_ARGS__)
 #else
 #    define WIDE_TRANSFORMC_OMP_SIMD_LOOP(...)

diff --git a/src/liboslexec/wide/wide_opnoise_periodic_perlin_deriv_Vec3.cpp b/src/liboslexec/wide/wide_opnoise_periodic_perlin_deriv_Vec3.cpp
@@ -24,17 +24,7 @@ template<> struct BatchedCGPolicy<Param::WDV, Param::WDV, Param::WV> {
 };
 template<>
 struct BatchedCGPolicy<Param::WDV, Param::WDV, Param::WDF, Param::WV, Param::WF> {
-#if ((defined(_WIN32) || defined(_WIN64)) && defined(__INTEL_COMPILER))
-    // Avoid stack overflow on windows build because compiler has hard coded stack limit.
-    // By not forcing everything to recursively inline and not explicitly vectorizing,
-    // the compiler uses less stack space at the cost of NOT creating a properly SIMD optimized function.
-    // Linux & OSX can increase stacksize before building
-
-    static constexpr int simd_threshold
-        = __OSL_WIDTH + 1;  // Make SIMD code path unreachable
-#else
     static constexpr int simd_threshold = 6;
-#endif
 };
 }  // namespace
 

diff --git a/src/liboslexec/wide/wide_opnoise_periodic_uperlin_deriv_Vec3.cpp b/src/liboslexec/wide/wide_opnoise_periodic_uperlin_deriv_Vec3.cpp
@@ -24,17 +24,7 @@ template<> struct BatchedCGPolicy<Param::WDV, Param::WDV, Param::WV> {
 };
 template<>
 struct BatchedCGPolicy<Param::WDV, Param::WDV, Param::WDF, Param::WV, Param::WF> {
-#if ((defined(_WIN32) || defined(_WIN64)) && defined(__INTEL_COMPILER))
-    // Avoid stack overflow on windows build because compiler has hard coded stack limit.
-    // By not forcing everything to recursively inline and not explicitly vectorizing,
-    // the compiler uses less stack space at the cost of NOT creating a properly SIMD optimized function.
-    // Linux & OSX can increase stacksize before building
-
-    static constexpr int simd_threshold
-        = __OSL_WIDTH + 1;  // Make SIMD code path unreachable
-#else
     static constexpr int simd_threshold = 6;
-#endif
 };
 }  // namespace
 

diff --git a/src/liboslexec/wide/wide_opnoise_perlin_deriv_Vec3.cpp b/src/liboslexec/wide/wide_opnoise_perlin_deriv_Vec3.cpp
@@ -22,17 +22,7 @@ template<> struct BatchedCGPolicy<Param::WDV, Param::WDV> {
     static constexpr int simd_threshold = 5;
 };
 template<> struct BatchedCGPolicy<Param::WDV, Param::WDV, Param::WDF> {
-#if ((defined(_WIN32) || defined(_WIN64)) && defined(__INTEL_COMPILER))
-    // Avoid stack overflow on windows build because compiler has hard coded stack limit.
-    // By not forcing everything to recursively inline and not explicitly vectorizing,
-    // the compiler uses less stack space at the cost of NOT creating a properly SIMD optimized function.
-    // Linux & OSX can increase stacksize before building
-
-    static constexpr int simd_threshold
-        = __OSL_WIDTH + 1;  // Make SIMD code path unreachable
-#else
     static constexpr int simd_threshold = 6;
-#endif
 };
 }  // namespace
 

diff --git a/src/liboslexec/wide/wide_opnoise_uperlin_deriv_Vec3.cpp b/src/liboslexec/wide/wide_opnoise_uperlin_deriv_Vec3.cpp
@@ -22,17 +22,7 @@ template<> struct BatchedCGPolicy<Param::WDV, Param::WDV> {
     static constexpr int simd_threshold = 5;
 };
 template<> struct BatchedCGPolicy<Param::WDV, Param::WDV, Param::WDF> {
-#if ((defined(_WIN32) || defined(_WIN64)) && defined(__INTEL_COMPILER))
-    // Avoid stack overflow on windows build because compiler has hard coded stack limit.
-    // By not forcing everything to recursively inline and not explicitly vectorizing,
-    // the compiler uses less stack space at the cost of NOT creating a properly SIMD optimized function.
-    // Linux & OSX can increase stacksize before building
-
-    static constexpr int simd_threshold
-        = __OSL_WIDTH + 1;  // Make SIMD code path unreachable
-#else
     static constexpr int simd_threshold = 6;
-#endif
 };
 }  // namespace
 

diff --git a/src/liboslexec/wide/wide_opstring.cpp b/src/liboslexec/wide/wide_opstring.cpp
@@ -75,7 +75,7 @@ __OSL_MASKED_OP2(strlen, Wi, Ws)(void* wr_, void* ws_, unsigned int mask_value)
 
     OSL_FORCEINLINE_BLOCK
     {
-#if (!OSL_CLANG_VERSION || OSL_INTEL_CLASSIC_COMPILER_VERSION)
+#if !OSL_CLANG_VERSION
         // Clang 11 generated SIMD crashes at runtime
         // TODO: investigate clang crash when vectorizing
         OSL_OMP_PRAGMA(omp simd simdlen(__OSL_WIDTH))

diff --git a/src/liboslnoise/sfm_gabornoise.h b/src/liboslnoise/sfm_gabornoise.h
@@ -356,19 +356,7 @@ gabor_cell(const sfm::GaborUniformParams& gup, const sfm::GaborParams& gp,
                 Dual2<float> gk     = gabor_kernel(w_i_t_s_f, omega_i_t_s_f,
                                                    phi_i_t_s_f, a_i_t_s_f,
                                                    x_k_i_t);  // 2D
-#if defined(__AVX512F__) && defined(__INTEL_COMPILER) \
-    && (__INTEL_COMPILER < 1800)
-                // icc17 with AVX512 had some incorrect results
-                // due to the not_finite code path executing even
-                // when the value was finite.  Workaround: using isnan | isinf
-                // instead of isfinite avoided the issue.
-                // icc18u3 doesn't exhibit the problem
-                // NOTE: tried using bitwise | to avoid branches and got internal compiler error
-                //bool not_finite = std::isnan(gk.val()) | std::isinf(gk.val());
-                bool not_finite = std::isnan(gk.val()) || std::isinf(gk.val());
-#else
-                bool not_finite = !std::isfinite(gk.val());
-#endif
+                bool not_finite     = !std::isfinite(gk.val());
                 if (OSL_UNLIKELY(not_finite)) {
                     // Numeric failure of the filtered version.  Fall
                     // back on the unfiltered.

diff --git a/src/testshade/batched_simplerend.cpp b/src/testshade/batched_simplerend.cpp
@@ -180,17 +180,7 @@ BatchedSimpleRenderer<WidthT>::get_matrix(BatchedShaderGlobals* bsg,
 
         OSL_OMP_PRAGMA(omp simd simdlen(WidthT))
         for (int lane = 0; lane < WidthT; ++lane) {
-#    if __INTEL_COMPILER >= 1900
-            // Used load + blend + store instead of masked store to temporarily work around
-            // an icc19u5 issue when automatic ISA dispatch is used causing scatters to be generated
-            Matrix44 m = result[lane];
-            if (result.mask()[lane]) {
-                m = uniformTransform;
-            }
-            result[ActiveLane(lane)] = m;
-#    else
             result[lane] = uniformTransform;
-#    endif
         }
 #endif
     }

diff --git a/testsuite/blackbody/ref/out.icc.exr → testsuite/blackbody/ref/out.alt.exr b/testsuite/blackbody/ref/out.icc.exr → testsuite/blackbody/ref/out.alt.exr