From 133a5a5a8cb1b7a4958edd4a5bc8a6d7b6c6649b Mon Sep 17 00:00:00 2001
From: barracuda156 <vital.had@gmail.com>
Date: Mon, 17 Jul 2023 09:15:17 +0800
Subject: [PATCH 2/2] Add optimization opts for PowerPC

---
 CMakeLists.txt     | 132 +++++++++++++++++++++++++++++----------------
 src/CMakeLists.txt |  30 ++++++-----
 2 files changed, 103 insertions(+), 59 deletions(-)

diff --git CMakeLists.txt CMakeLists.txt
index ecbd37e..92f6e15 100644
--- CMakeLists.txt
+++ CMakeLists.txt
@@ -2,7 +2,12 @@
 #          flags manually on cmd line
 #       2/ Should we standardise on just AVX?  As machine we run on
 #          may be different to machine we build on
-set(CMAKE_OSX_DEPLOYMENT_TARGET "10.9" CACHE STRING "Minimum OS X deployment version")
+
+if(APPLE AND CMAKE_OSX_ARCHITECTURES MATCHES "ppc|ppc64")
+    set(CMAKE_OSX_DEPLOYMENT_TARGET "10.5" CACHE STRING "Minimum OS X deployment version")
+else()
+    set(CMAKE_OSX_DEPLOYMENT_TARGET "10.9" CACHE STRING "Minimum OS X deployment version")
+endif()
 
 cmake_minimum_required(VERSION 3.0)
 
@@ -14,10 +19,23 @@ endif()
 project(LPCNet C)
 
 option(DISABLE_CPU_OPTIMIZATION "Disable CPU optimization discovery." OFF)
+
 option(AVX2 "Enable AVX2 CPU optimizations." OFF)
-option(AVX "Enable AVX CPU optimizations." ON)
-option(SSE "Enable SSE CPU optimizations." ON)
-option(NEON "Enable NEON CPU optimizations for RPi." ON)
+
+if(DISABLE_CPU_OPTIMIZATION)
+    option(AVX "Enable AVX CPU optimizations." OFF)
+    option(SSE "Enable SSE CPU optimizations." OFF)
+    option(NEON "Enable NEON CPU optimizations for RPi." OFF)
+    option(PPC_OPTS "Enable PowerPC CPU optimizations." OFF)
+else()
+    if(CMAKE_SYSTEM_PROCESSOR MATCHES "powerpc")
+        option(PPC_OPTS "Enable PowerPC CPU optimizations." ON)
+    else()
+        option(AVX "Enable AVX CPU optimizations." ON)
+        option(SSE "Enable SSE CPU optimizations." ON)
+        option(NEON "Enable NEON CPU optimizations for RPi." ON)
+    endif()
+endif()
 
 include(GNUInstallDirs)
 mark_as_advanced(CLEAR
@@ -26,9 +44,13 @@ mark_as_advanced(CLEAR
     CMAKE_INSTALL_LIBDIR
 )
 
-# Build universal ARM64 and x86_64 binaries on Mac.
+# Build universal ARM64 and x86_64 binaries on Mac, unless on PowerPC.
 if(BUILD_OSX_UNIVERSAL)
-set(CMAKE_OSX_ARCHITECTURES "x86_64;arm64")
+  if(CMAKE_SYSTEM_PROCESSOR MATCHES "powerpc")
+    set(CMAKE_OSX_ARCHITECTURES "ppc;ppc64")
+  else()
+    set(CMAKE_OSX_ARCHITECTURES "x86_64;arm64")
+  endif()
 endif(BUILD_OSX_UNIVERSAL)
 
 #
@@ -93,36 +115,46 @@ message(STATUS "Host system arch is: ${CMAKE_SYSTEM_PROCESSOR}")
 # Detection of available CPU optimizations
 if(NOT DISABLE_CPU_OPTIMIZATION)
     if(UNIX AND NOT APPLE)
-        message(STATUS "Looking for available CPU optimizations on Linux/BSD system...")
-        execute_process(COMMAND grep -c "avx2" /proc/cpuinfo
-            OUTPUT_VARIABLE AVX2_PRESENT)
-        execute_process(COMMAND grep -c "avx " /proc/cpuinfo
-            OUTPUT_VARIABLE AVX_PRESENT)
-        execute_process(COMMAND grep -c "sse4_1 " /proc/cpuinfo
-            OUTPUT_VARIABLE SSE_PRESENT)
-        execute_process(COMMAND grep -c "neon" /proc/cpuinfo
-            OUTPUT_VARIABLE NEON_PRESENT)
-    elseif(APPLE)
-        if(BUILD_OSX_UNIVERSAL)
-            # Presume AVX and SSE are enabled on the x86 side. (AVX2 is not guaranteed depending
-            # on model.) The ARM side will auto-enable NEON optimizations by virtue of being aarch64.
-            set(AVX_PRESENT TRUE)
-            set(SSE_PRESENT TRUE)
-            set(NEON_PRESENT TRUE)
+        if(CMAKE_SYSTEM_PROCESSOR MATCHES "powerpc")
+            message(STATUS "PowerPC processor detected.")
+            set(PPC_PRESENT TRUE)
         else()
-            # Under OSX we need to look through a few sysctl entries to determine what our CPU supports.
-            message(STATUS "Looking for available CPU optimizations on an OSX system...")
-            execute_process(COMMAND sysctl -a COMMAND grep machdep.cpu.leaf7_features COMMAND grep -c AVX2
+            message(STATUS "Looking for available CPU optimizations on Linux/BSD system...")
+            execute_process(COMMAND grep -c "avx2" /proc/cpuinfo
                 OUTPUT_VARIABLE AVX2_PRESENT)
-            execute_process(COMMAND sysctl -a COMMAND grep machdep.cpu.features COMMAND grep -c AVX
+            execute_process(COMMAND grep -c "avx " /proc/cpuinfo
                 OUTPUT_VARIABLE AVX_PRESENT)
-            execute_process(COMMAND sysctl -a COMMAND grep machdep.cpu.features COMMAND grep -c SSE4.1
+            execute_process(COMMAND grep -c "sse4_1 " /proc/cpuinfo
                 OUTPUT_VARIABLE SSE_PRESENT)
+            execute_process(COMMAND grep -c "neon" /proc/cpuinfo
+                OUTPUT_VARIABLE NEON_PRESENT)
+        endif()
+    elseif(APPLE)
+        if(CMAKE_SYSTEM_PROCESSOR MATCHES "powerpc")
+            message(STATUS "PowerPC processor detected.")
+            set(PPC_PRESENT TRUE)
+        else()
+            if(BUILD_OSX_UNIVERSAL)
+                # Presume AVX and SSE are enabled on the x86 side. (AVX2 is not guaranteed depending
+                # on model.) The ARM side will auto-enable NEON optimizations by virtue of being aarch64.
+                set(AVX_PRESENT TRUE)
+                set(SSE_PRESENT TRUE)
+                set(NEON_PRESENT TRUE)
+            else()
+                # Under OSX we need to look through a few sysctl entries to determine what our CPU supports.
+                message(STATUS "Looking for available CPU optimizations on an OSX system...")
+                execute_process(COMMAND sysctl -a COMMAND grep machdep.cpu.leaf7_features COMMAND grep -c AVX2
+                    OUTPUT_VARIABLE AVX2_PRESENT)
+                execute_process(COMMAND sysctl -a COMMAND grep machdep.cpu.features COMMAND grep -c AVX
+                    OUTPUT_VARIABLE AVX_PRESENT)
+                execute_process(COMMAND sysctl -a COMMAND grep machdep.cpu.features COMMAND grep -c SSE4.1
+                    OUTPUT_VARIABLE SSE_PRESENT)
                 
-            # Unlike with the above, NEON *is* guaranteed if on ARM as there were never any ARM32 Macs 
-            # available. We don't need any specific compiler flags for this, though.
-            set(NEON_PRESENT TRUE)
-        endif(BUILD_OSX_UNIVERSAL)
+                # Unlike with the above, NEON *is* guaranteed if on ARM as there were never any ARM32 Macs 
+                # available. We don't need any specific compiler flags for this, though.
+                set(NEON_PRESENT TRUE)
+            endif(BUILD_OSX_UNIVERSAL)
+        endif()
     elseif(WIN32)
         message(STATUS "No detection capability on Windows, assuming AVX is available.")
         set(AVX_PRESENT TRUE)
@@ -140,21 +172,29 @@ endif()
 
 set(LPCNET_C_PROC_FLAGS "")
 
-if(${AVX2} AND (${AVX2_PRESENT} OR ${AVX2_PRESENT} GREATER 0))
-    message(STATUS "avx2 processor flags found or enabled.")
-    set(LPCNET_C_PROC_FLAGS -mavx2 -mfma)
-elseif(${AVX} AND (${AVX_PRESENT} OR ${AVX_PRESENT} GREATER 0))
-# AVX2 machines will also match on AVX
-    message(STATUS "avx processor flags found or enabled.")
-    set(LPCNET_C_PROC_FLAGS -mavx)
-elseif(${SSE} AND (${SSE_PRESENT} OR ${SSE_PRESENT} GREATER 0))
-# AVX and AVX2 machines will also match on SSE
-    message(STATUS "sse processor flags found or enabled.")
-    set(LPCNET_C_PROC_FLAGS -msse4.1)
-elseif(${NEON} AND (${NEON_PRESENT} OR ${NEON_PRESENT} GREATER 0))
-    # RPi / ARM 32bit
-    message(STATUS "neon processor flags found or enabled.")
-    set(LPCNET_C_PROC_FLAGS -mfpu=neon -march=armv8-a -mtune=cortex-a53)
+if(CMAKE_SYSTEM_PROCESSOR MATCHES "powerpc")
+    if(${PPC_OPTS} AND (${PPC_PRESENT} OR ${PPC_PRESENT} GREATER 0))
+        # PowerPC
+        message(STATUS "PowerPC processor flags found or enabled.")
+        set(LPCNET_C_PROC_FLAGS "-mcpu=native -mtune=native")
+    endif()
+else()
+    if(${AVX2} AND (${AVX2_PRESENT} OR ${AVX2_PRESENT} GREATER 0))
+        message(STATUS "avx2 processor flags found or enabled.")
+        set(LPCNET_C_PROC_FLAGS -mavx2 -mfma)
+    elseif(${AVX} AND (${AVX_PRESENT} OR ${AVX_PRESENT} GREATER 0))
+        # AVX2 machines will also match on AVX
+        message(STATUS "avx processor flags found or enabled.")
+        set(LPCNET_C_PROC_FLAGS -mavx)
+    elseif(${SSE} AND (${SSE_PRESENT} OR ${SSE_PRESENT} GREATER 0))
+        # AVX and AVX2 machines will also match on SSE
+        message(STATUS "sse processor flags found or enabled.")
+        set(LPCNET_C_PROC_FLAGS -msse4.1)
+    elseif(${NEON} AND (${NEON_PRESENT} OR ${NEON_PRESENT} GREATER 0))
+        # RPi / ARM 32bit
+        message(STATUS "neon processor flags found or enabled.")
+        set(LPCNET_C_PROC_FLAGS -mfpu=neon -march=armv8-a -mtune=cortex-a53)
+    endif()
 endif()
 
 # grab latest NN model (or substitute your own)
diff --git src/CMakeLists.txt src/CMakeLists.txt
index 7dcb181..72f3765 100644
--- src/CMakeLists.txt
+++ src/CMakeLists.txt
@@ -72,20 +72,24 @@ target_link_libraries(dump_data lpcnetfreedv m)
 add_executable(test_lpcnet test_lpcnet.c nnet_rw.c)
 target_link_libraries(test_lpcnet lpcnetfreedv m)
 
-if(
-    (${SSE} AND (${SSE_PRESENT} OR ${SSE_PRESENT} GREATER 0)) OR
-    (${AVX} AND (${AVX_PRESENT} OR ${AVX_PRESENT} GREATER 0)) OR
-    (${AVX2} AND (${AVX2_PRESENT} OR ${AVX2_PRESENT} GREATER 0)) OR
-    (${NEON} AND (${NEON_PRESENT} OR ${NEON_PRESENT} GREATER 0)) OR
-    CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64")
-    add_executable(test_vec test_vec.c)
-    target_link_libraries(test_vec m)
-    
-    if(LPCNET_C_PROC_FLAGS)
-        set_source_files_properties(test_vec.c PROPERTIES COMPILE_FLAGS ${LPCNET_C_PROC_FLAGS})
-    endif(LPCNET_C_PROC_FLAGS)
+if(CMAKE_SYSTEM_PROCESSOR MATCHES "powerpc")
+    message(WARNING "Tests not implemented for PowerPC, not building test_vec.")
 else()
-    message(WARNING "No SSE/AVX/AVX2 CPU flags identified, not building test_vec.")
+    if(
+        (${SSE} AND (${SSE_PRESENT} OR ${SSE_PRESENT} GREATER 0)) OR
+        (${AVX} AND (${AVX_PRESENT} OR ${AVX_PRESENT} GREATER 0)) OR
+        (${AVX2} AND (${AVX2_PRESENT} OR ${AVX2_PRESENT} GREATER 0)) OR
+        (${NEON} AND (${NEON_PRESENT} OR ${NEON_PRESENT} GREATER 0)) OR
+        CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64")
+        add_executable(test_vec test_vec.c)
+        target_link_libraries(test_vec m)
+
+        if(LPCNET_C_PROC_FLAGS)
+            set_source_files_properties(test_vec.c PROPERTIES COMPILE_FLAGS ${LPCNET_C_PROC_FLAGS})
+        endif(LPCNET_C_PROC_FLAGS)
+    else()
+        message(WARNING "No SSE/AVX/AVX2 CPU flags identified, not building test_vec.")
+    endif()
 endif()
 
 add_executable(quant_feat quant_feat.c)
