find_library(FOUNDATION_LIBRARY Foundation REQUIRED)
find_library(METAL_FRAMEWORK    Metal      REQUIRED)
find_library(METALKIT_FRAMEWORK MetalKit   REQUIRED)

message(STATUS "Metal framework found")

ggml_add_backend_library(ggml-metal
                         ggml-metal.cpp
                         ggml-metal-device.m
                         ggml-metal-device.cpp
                         ggml-metal-common.cpp
                         ggml-metal-context.m
                         ggml-metal-ops.cpp
                        )

target_link_libraries(ggml-metal PRIVATE
                      ${FOUNDATION_LIBRARY}
                      ${METAL_FRAMEWORK}
                      ${METALKIT_FRAMEWORK}
                      )

if (GGML_METAL_NDEBUG)
    add_compile_definitions(GGML_METAL_NDEBUG)
endif()

set(METALLIB_COMMON "${CMAKE_CURRENT_SOURCE_DIR}/../ggml-common.h")
set(METALLIB_KERNELS_COMMON     "${CMAKE_CURRENT_SOURCE_DIR}/kernels/common.h")
set(METALLIB_KERNELS_DEQUANTIZE "${CMAKE_CURRENT_SOURCE_DIR}/kernels/dequantize.h")
set(METALLIB_KERNELS_QUANTIZE   "${CMAKE_CURRENT_SOURCE_DIR}/kernels/quantize.h")

set(METALLIB_KERNEL_SOURCES
    kernels/fa.metal
    kernels/mul_mv.metal
    kernels/mul_mm.metal
    kernels/quantize.metal
    kernels/softmax.metal
    kernels/norm.metal
    kernels/unary.metal
    kernels/binbcast.metal
    kernels/reduce.metal
    kernels/tri.metal
    kernels/ssm.metal
    kernels/wkv.metal
    kernels/gated_delta_net.metal
    kernels/solve_tri.metal
    kernels/rope.metal
    kernels/conv.metal
    kernels/upscale.metal
    kernels/argsort.metal
    kernels/pool.metal
    kernels/misc.metal
)

if (GGML_METAL_EMBED_LIBRARY)
    enable_language(ASM)

    add_compile_definitions(GGML_METAL_EMBED_LIBRARY)

    set(METALLIB_IMPL "${CMAKE_CURRENT_SOURCE_DIR}/ggml-metal-impl.h")

    file(MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/autogenerated")

    set(METALLIB_EMBED_ASM_FILES "")
    foreach(src ${METALLIB_KERNEL_SOURCES})
        get_filename_component(kind ${src} NAME_WE)
        # symbol names must be valid C identifiers ('-' is not allowed)
        string(REPLACE "-" "_" kind_sym ${kind})

        set(SRC   "${CMAKE_CURRENT_SOURCE_DIR}/kernels/${kind}.metal")
        set(EMBED "${CMAKE_CURRENT_BINARY_DIR}/autogenerated/ggml-metal-embed-${kind}.metal")
        set(ASM   "${CMAKE_CURRENT_BINARY_DIR}/autogenerated/ggml-metal-embed-${kind}.s")

        # only prepend headers that this source actually includes
        set(HEADERS_FOR_SRC ${METALLIB_KERNELS_COMMON})
        file(STRINGS ${SRC} _has_dequantize REGEX "#include \"dequantize\\.h\"")
        file(STRINGS ${SRC} _has_quantize   REGEX "#include \"quantize\\.h\"")
        if(_has_dequantize)
            list(APPEND HEADERS_FOR_SRC ${METALLIB_KERNELS_DEQUANTIZE})
        endif()
        if(_has_quantize)
            list(APPEND HEADERS_FOR_SRC ${METALLIB_KERNELS_QUANTIZE})
        endif()

        add_custom_command(
            OUTPUT "${ASM}"
            # Step 1: concatenate shared headers + this kernel source
            COMMAND cat ${HEADERS_FOR_SRC} ${SRC} > "${EMBED}.tmp1"
            # Step 2: remove internal #include and #pragma once
            COMMAND sed -e "/\#include \"common.h\"/d" -e "/\#include \"dequantize.h\"/d" -e "/\#include \"quantize.h\"/d" -e "/\#pragma once/d" < "${EMBED}.tmp1" > "${EMBED}.tmp2"
            # Step 3: inline ggml-common.h (replacing __embed_ggml-common.h__ sentinel)
            COMMAND sed -e "/__embed_ggml-common.h__/r ${METALLIB_COMMON}" -e "/__embed_ggml-common.h__/d" < "${EMBED}.tmp2" > "${EMBED}.tmp3"
            # Step 4: inline ggml-metal-impl.h
            COMMAND sed -e "/\#include \"ggml-metal-impl.h\"/r ${METALLIB_IMPL}" -e "/\#include \"ggml-metal-impl.h\"/d" < "${EMBED}.tmp3" > "${EMBED}"
            # Step 5: emit an asm chunk with kind-specific start/end symbols
            #   note: '-' is illegal in C symbols, so we use kind_sym; the macOS
            #   section name is limited to 16 chars so we keep it shared
            #   across kinds (__ggml_metallib) and only vary the global symbols.
            COMMAND echo ".section __DATA,__ggml_metallib"                       >  "${ASM}"
            COMMAND echo ".globl _ggml_metallib_${kind_sym}_start"               >> "${ASM}"
            COMMAND echo "_ggml_metallib_${kind_sym}_start:"                     >> "${ASM}"
            COMMAND echo .incbin "\"${EMBED}\""                                  >> "${ASM}"
            COMMAND echo ".globl _ggml_metallib_${kind_sym}_end"                 >> "${ASM}"
            COMMAND echo "_ggml_metallib_${kind_sym}_end:"                       >> "${ASM}"
            DEPENDS ../ggml-common.h ggml-metal-impl.h
                    kernels/common.h kernels/dequantize.h kernels/quantize.h
                    kernels/${kind}.metal
            COMMENT "Generate embedded Metal library for ${kind}"
            VERBATIM
        )

        list(APPEND METALLIB_EMBED_ASM_FILES "${ASM}")
    endforeach()

    target_sources(ggml-metal PRIVATE ${METALLIB_EMBED_ASM_FILES})
else()
    # copy header files to bin directory
    configure_file(../ggml-common.h  ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-common.h     COPYONLY)
    configure_file(ggml-metal-impl.h ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal-impl.h COPYONLY)

    file(MAKE_DIRECTORY "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/kernels")
    configure_file(kernels/common.h     ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/kernels/common.h     COPYONLY)
    configure_file(kernels/dequantize.h ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/kernels/dequantize.h COPYONLY)
    configure_file(kernels/quantize.h   ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/kernels/quantize.h   COPYONLY)

    foreach(src ${METALLIB_KERNEL_SOURCES})
        configure_file(${src} ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${src} COPYONLY)
    endforeach()

    if (GGML_METAL_SHADER_DEBUG)
        # note: disabling fast math is needed in order to pass tests/test-backend-ops
        # note: adding -fno-inline fixes the tests when using MTL_SHADER_VALIDATION=1
        # note: unfortunately, we have to call it default.metallib instead of ggml.metallib
        #       ref: https://github.com/ggml-org/whisper.cpp/issues/1720
        # note: adding -g causes segmentation fault during compile
        set(XC_FLAGS -fno-fast-math -fno-inline)
    else()
        set(XC_FLAGS -O3)
    endif()

    if (GGML_METAL_MACOSX_VERSION_MIN)
        message(STATUS "Adding  -mmacosx-version-min=${GGML_METAL_MACOSX_VERSION_MIN} flag to metal compilation")
        list   (APPEND XC_FLAGS -mmacosx-version-min=${GGML_METAL_MACOSX_VERSION_MIN})
    endif()

    if (GGML_METAL_STD)
        message(STATUS "Adding  -std=${GGML_METAL_STD} flag to metal compilation")
        list   (APPEND XC_FLAGS -std=${GGML_METAL_STD})
    endif()

    # Compile each kernel source to .air, then link into default.metallib
    set(AIR_FILES "")
    foreach(src ${METALLIB_KERNEL_SOURCES})
        get_filename_component(name ${src} NAME_WE)
        set(AIR "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${name}.air")
        list(APPEND AIR_FILES ${AIR})
        add_custom_command(
            OUTPUT ${AIR}
            COMMAND xcrun -sdk macosx metal ${XC_FLAGS} -I ${CMAKE_RUNTIME_OUTPUT_DIRECTORY} -c ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${src} -o ${AIR}
            DEPENDS ${src} kernels/common.h kernels/dequantize.h kernels/quantize.h ${METALLIB_COMMON} ggml-metal-impl.h
            COMMENT "Compiling ${src}"
            VERBATIM
        )
    endforeach()

    add_custom_command(
        OUTPUT ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/default.metallib
        COMMAND xcrun -sdk macosx metallib ${AIR_FILES} -o ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/default.metallib
        COMMAND rm -f ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-common.h
        COMMAND rm -f ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal-impl.h
        COMMAND rm -rf ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/kernels
        DEPENDS ${AIR_FILES}
        COMMENT "Linking Metal kernels into default.metallib"
    )

    add_custom_target(
        ggml-metal-lib ALL
        DEPENDS ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/default.metallib
    )
endif() # GGML_METAL_EMBED_LIBRARY

if (NOT GGML_METAL_EMBED_LIBRARY)
    install(
        DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/kernels/
        DESTINATION ${CMAKE_INSTALL_BINDIR}/kernels
        FILES_MATCHING PATTERN "*.metal" PATTERN "*.h"
    )

    install(
        FILES ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/default.metallib
        DESTINATION ${CMAKE_INSTALL_BINDIR}
    )
endif()
