; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
;=========================== begin_copyright_notice ============================
;
; Copyright (C) 2024 Intel Corporation
;
; SPDX-License-Identifier: MIT
;
;============================ end_copyright_notice =============================
;
; RUN: igc_opt --igc-scalarize -S < %s | FileCheck %s
; ------------------------------------------------
; ScalarizeFunction
; ------------------------------------------------
; The pass should break vector operation into many scalar operations
; ------------------------------------------------

define spir_kernel void @basic(<2 x i32> %src1, <2 x i32> %src2) {
; CHECK-LABEL: define spir_kernel void @basic(
; CHECK-SAME: <2 x i32> [[SRC1:%.*]], <2 x i32> [[SRC2:%.*]]) {
; CHECK-NEXT:    [[SRC2_SCALAR:%.*]] = extractelement <2 x i32> [[SRC2]], i32 0
; CHECK-NEXT:    [[SRC2_SCALAR2:%.*]] = extractelement <2 x i32> [[SRC2]], i32 1
; CHECK-NEXT:    [[SRC1_SCALAR:%.*]] = extractelement <2 x i32> [[SRC1]], i32 0
; CHECK-NEXT:    [[SRC1_SCALAR1:%.*]] = extractelement <2 x i32> [[SRC1]], i32 1
; CHECK-NEXT:    [[TMP1:%.*]] = alloca <2 x i32>, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[SRC1_SCALAR]], [[SRC2_SCALAR]]
; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[SRC1_SCALAR1]], [[SRC2_SCALAR2]]
; CHECK-NEXT:    [[DOTASSEMBLED_VECT:%.*]] = insertelement <2 x i32> undef, i32 [[TMP2]], i32 0
; CHECK-NEXT:    [[DOTASSEMBLED_VECT3:%.*]] = insertelement <2 x i32> [[DOTASSEMBLED_VECT]], i32 [[TMP3]], i32 1
; CHECK-NEXT:    store <2 x i32> [[DOTASSEMBLED_VECT3]], <2 x i32>* [[TMP1]], align 8
; CHECK-NEXT:    ret void
;
  %1 = alloca <2 x i32>
  %2 = add <2 x i32> %src1, %src2
  store <2 x i32> %2, <2 x i32>* %1
  ret void
}

define spir_kernel void @should_preserve_metadata(<2 x i32> %src1, <2 x i32> %src2) {
; CHECK-LABEL: define spir_kernel void @should_preserve_metadata(
; CHECK-SAME: <2 x i32> [[SRC1:%.*]], <2 x i32> [[SRC2:%.*]]) {
; CHECK-NEXT:    [[SRC2_SCALAR:%.*]] = extractelement <2 x i32> [[SRC2]], i32 0
; CHECK-NEXT:    [[SRC2_SCALAR2:%.*]] = extractelement <2 x i32> [[SRC2]], i32 1
; CHECK-NEXT:    [[SRC1_SCALAR:%.*]] = extractelement <2 x i32> [[SRC1]], i32 0
; CHECK-NEXT:    [[SRC1_SCALAR1:%.*]] = extractelement <2 x i32> [[SRC1]], i32 1
; CHECK-NEXT:    [[TMP1:%.*]] = alloca <2 x i32>, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[SRC1_SCALAR]], [[SRC2_SCALAR]], !any_metadata [[META0:![0-9]+]]
; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[SRC1_SCALAR1]], [[SRC2_SCALAR2]], !any_metadata [[META0]]
; CHECK-NEXT:    [[DOTASSEMBLED_VECT:%.*]] = insertelement <2 x i32> undef, i32 [[TMP2]], i32 0
; CHECK-NEXT:    [[DOTASSEMBLED_VECT3:%.*]] = insertelement <2 x i32> [[DOTASSEMBLED_VECT]], i32 [[TMP3]], i32 1
; CHECK-NEXT:    store <2 x i32> [[DOTASSEMBLED_VECT3]], <2 x i32>* [[TMP1]], align 8
; CHECK-NEXT:    ret void
;
  %1 = alloca <2 x i32>
  %2 = add <2 x i32> %src1, %src2, !any_metadata !{i32 0}
  store <2 x i32> %2, <2 x i32>* %1
  ret void
}

define spir_kernel void @should_work_with_different_instruction_type(<2 x float> %src1, <2 x float> %src2) {
; CHECK-LABEL: define spir_kernel void @should_work_with_different_instruction_type(
; CHECK-SAME: <2 x float> [[SRC1:%.*]], <2 x float> [[SRC2:%.*]]) {
; CHECK-NEXT:    [[SRC2_SCALAR:%.*]] = extractelement <2 x float> [[SRC2]], i32 0
; CHECK-NEXT:    [[SRC2_SCALAR2:%.*]] = extractelement <2 x float> [[SRC2]], i32 1
; CHECK-NEXT:    [[SRC1_SCALAR:%.*]] = extractelement <2 x float> [[SRC1]], i32 0
; CHECK-NEXT:    [[SRC1_SCALAR1:%.*]] = extractelement <2 x float> [[SRC1]], i32 1
; CHECK-NEXT:    [[TMP1:%.*]] = alloca <2 x float>, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = fadd float [[SRC1_SCALAR]], [[SRC2_SCALAR]]
; CHECK-NEXT:    [[TMP3:%.*]] = fadd float [[SRC1_SCALAR1]], [[SRC2_SCALAR2]]
; CHECK-NEXT:    [[DOTASSEMBLED_VECT:%.*]] = insertelement <2 x float> undef, float [[TMP2]], i32 0
; CHECK-NEXT:    [[DOTASSEMBLED_VECT3:%.*]] = insertelement <2 x float> [[DOTASSEMBLED_VECT]], float [[TMP3]], i32 1
; CHECK-NEXT:    store <2 x float> [[DOTASSEMBLED_VECT3]], <2 x float>* [[TMP1]], align 8
; CHECK-NEXT:    ret void
;
  %1 = alloca <2 x float>
  %2 = fadd <2 x float> %src1, %src2
  store <2 x float> %2, <2 x float>* %1
  ret void
}

define spir_kernel void @should_work_with_exact_flag(<2 x i32> %src1, <2 x i32> %src2) {
; CHECK-LABEL: @should_work_with_exact_flag(
; CHECK-NEXT:    [[SRC2_SCALAR:%.*]] = extractelement <2 x i32> [[SRC2:%.*]], i32 0
; CHECK-NEXT:    [[SRC2_SCALAR2:%.*]] = extractelement <2 x i32> [[SRC2]], i32 1
; CHECK-NEXT:    [[SRC1_SCALAR:%.*]] = extractelement <2 x i32> [[SRC1:%.*]], i32 0
; CHECK-NEXT:    [[SRC1_SCALAR1:%.*]] = extractelement <2 x i32> [[SRC1]], i32 1
; CHECK-NEXT:    [[TMP1:%.*]] = alloca <2 x i32>, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = udiv exact i32 [[SRC1_SCALAR]], [[SRC2_SCALAR]]
; CHECK-NEXT:    [[TMP3:%.*]] = udiv exact i32 [[SRC1_SCALAR1]], [[SRC2_SCALAR2]]
; CHECK-NEXT:    [[DOTASSEMBLED_VECT:%.*]] = insertelement <2 x i32> undef, i32 [[TMP2]], i32 0
; CHECK-NEXT:    [[DOTASSEMBLED_VECT3:%.*]] = insertelement <2 x i32> [[DOTASSEMBLED_VECT]], i32 [[TMP3]], i32 1
; CHECK-NEXT:    store <2 x i32> [[DOTASSEMBLED_VECT3]], <2 x i32>* [[TMP1]], align 8
; CHECK-NEXT:    ret void
;
  %1 = alloca <2 x i32>
  %2 = udiv exact <2 x i32> %src1, %src2
  store <2 x i32> %2, <2 x i32>* %1
  ret void
}

define spir_kernel void @should_work_with_fast_math_flags(<2 x float> %src1, <2 x float> %src2) {
; CHECK-LABEL: define spir_kernel void @should_work_with_fast_math_flags(
; CHECK-SAME: <2 x float> [[SRC1:%.*]], <2 x float> [[SRC2:%.*]]) {
; CHECK-NEXT:    [[SRC2_SCALAR:%.*]] = extractelement <2 x float> [[SRC2]], i32 0
; CHECK-NEXT:    [[SRC2_SCALAR2:%.*]] = extractelement <2 x float> [[SRC2]], i32 1
; CHECK-NEXT:    [[SRC1_SCALAR:%.*]] = extractelement <2 x float> [[SRC1]], i32 0
; CHECK-NEXT:    [[SRC1_SCALAR1:%.*]] = extractelement <2 x float> [[SRC1]], i32 1
; CHECK-NEXT:    [[TMP1:%.*]] = alloca <2 x float>, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = fadd fast float [[SRC1_SCALAR]], [[SRC2_SCALAR]]
; CHECK-NEXT:    [[TMP3:%.*]] = fadd fast float [[SRC1_SCALAR1]], [[SRC2_SCALAR2]]
; CHECK-NEXT:    [[DOTASSEMBLED_VECT:%.*]] = insertelement <2 x float> undef, float [[TMP2]], i32 0
; CHECK-NEXT:    [[DOTASSEMBLED_VECT3:%.*]] = insertelement <2 x float> [[DOTASSEMBLED_VECT]], float [[TMP3]], i32 1
; CHECK-NEXT:    store <2 x float> [[DOTASSEMBLED_VECT3]], <2 x float>* [[TMP1]], align 8
; CHECK-NEXT:    ret void
;
  %1 = alloca <2 x float>
  %2 = fadd fast <2 x float> %src1, %src2
  store <2 x float> %2, <2 x float>* %1
  ret void
}

define spir_kernel void @should_work_with_different_value_type(<2 x i64> %src1, <2 x i64> %src2) {
; CHECK-LABEL: define spir_kernel void @should_work_with_different_value_type(
; CHECK-SAME: <2 x i64> [[SRC1:%.*]], <2 x i64> [[SRC2:%.*]]) {
; CHECK-NEXT:    [[SRC2_SCALAR:%.*]] = extractelement <2 x i64> [[SRC2]], i32 0
; CHECK-NEXT:    [[SRC2_SCALAR2:%.*]] = extractelement <2 x i64> [[SRC2]], i32 1
; CHECK-NEXT:    [[SRC1_SCALAR:%.*]] = extractelement <2 x i64> [[SRC1]], i32 0
; CHECK-NEXT:    [[SRC1_SCALAR1:%.*]] = extractelement <2 x i64> [[SRC1]], i32 1
; CHECK-NEXT:    [[TMP1:%.*]] = alloca <2 x i64>, align 16
; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[SRC1_SCALAR]], [[SRC2_SCALAR]]
; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[SRC1_SCALAR1]], [[SRC2_SCALAR2]]
; CHECK-NEXT:    [[DOTASSEMBLED_VECT:%.*]] = insertelement <2 x i64> undef, i64 [[TMP2]], i32 0
; CHECK-NEXT:    [[DOTASSEMBLED_VECT3:%.*]] = insertelement <2 x i64> [[DOTASSEMBLED_VECT]], i64 [[TMP3]], i32 1
; CHECK-NEXT:    store <2 x i64> [[DOTASSEMBLED_VECT3]], <2 x i64>* [[TMP1]], align 16
; CHECK-NEXT:    ret void
;
  %1 = alloca <2 x i64>
  %2 = add <2 x i64> %src1, %src2
  store <2 x i64> %2, <2 x i64>* %1
  ret void
}

define spir_kernel void @should_work_with_larger_vector_size(<16 x i32> %src1, <16 x i32> %src2) {
; CHECK-LABEL: define spir_kernel void @should_work_with_larger_vector_size(
; CHECK-SAME: <16 x i32> [[SRC1:%.*]], <16 x i32> [[SRC2:%.*]]) {
; CHECK-NEXT:    [[SRC2_SCALAR:%.*]] = extractelement <16 x i32> [[SRC2]], i32 0
; CHECK-NEXT:    [[SRC2_SCALAR16:%.*]] = extractelement <16 x i32> [[SRC2]], i32 1
; CHECK-NEXT:    [[SRC2_SCALAR17:%.*]] = extractelement <16 x i32> [[SRC2]], i32 2
; CHECK-NEXT:    [[SRC2_SCALAR18:%.*]] = extractelement <16 x i32> [[SRC2]], i32 3
; CHECK-NEXT:    [[SRC2_SCALAR19:%.*]] = extractelement <16 x i32> [[SRC2]], i32 4
; CHECK-NEXT:    [[SRC2_SCALAR20:%.*]] = extractelement <16 x i32> [[SRC2]], i32 5
; CHECK-NEXT:    [[SRC2_SCALAR21:%.*]] = extractelement <16 x i32> [[SRC2]], i32 6
; CHECK-NEXT:    [[SRC2_SCALAR22:%.*]] = extractelement <16 x i32> [[SRC2]], i32 7
; CHECK-NEXT:    [[SRC2_SCALAR23:%.*]] = extractelement <16 x i32> [[SRC2]], i32 8
; CHECK-NEXT:    [[SRC2_SCALAR24:%.*]] = extractelement <16 x i32> [[SRC2]], i32 9
; CHECK-NEXT:    [[SRC2_SCALAR25:%.*]] = extractelement <16 x i32> [[SRC2]], i32 10
; CHECK-NEXT:    [[SRC2_SCALAR26:%.*]] = extractelement <16 x i32> [[SRC2]], i32 11
; CHECK-NEXT:    [[SRC2_SCALAR27:%.*]] = extractelement <16 x i32> [[SRC2]], i32 12
; CHECK-NEXT:    [[SRC2_SCALAR28:%.*]] = extractelement <16 x i32> [[SRC2]], i32 13
; CHECK-NEXT:    [[SRC2_SCALAR29:%.*]] = extractelement <16 x i32> [[SRC2]], i32 14
; CHECK-NEXT:    [[SRC2_SCALAR30:%.*]] = extractelement <16 x i32> [[SRC2]], i32 15
; CHECK-NEXT:    [[SRC1_SCALAR:%.*]] = extractelement <16 x i32> [[SRC1]], i32 0
; CHECK-NEXT:    [[SRC1_SCALAR1:%.*]] = extractelement <16 x i32> [[SRC1]], i32 1
; CHECK-NEXT:    [[SRC1_SCALAR2:%.*]] = extractelement <16 x i32> [[SRC1]], i32 2
; CHECK-NEXT:    [[SRC1_SCALAR3:%.*]] = extractelement <16 x i32> [[SRC1]], i32 3
; CHECK-NEXT:    [[SRC1_SCALAR4:%.*]] = extractelement <16 x i32> [[SRC1]], i32 4
; CHECK-NEXT:    [[SRC1_SCALAR5:%.*]] = extractelement <16 x i32> [[SRC1]], i32 5
; CHECK-NEXT:    [[SRC1_SCALAR6:%.*]] = extractelement <16 x i32> [[SRC1]], i32 6
; CHECK-NEXT:    [[SRC1_SCALAR7:%.*]] = extractelement <16 x i32> [[SRC1]], i32 7
; CHECK-NEXT:    [[SRC1_SCALAR8:%.*]] = extractelement <16 x i32> [[SRC1]], i32 8
; CHECK-NEXT:    [[SRC1_SCALAR9:%.*]] = extractelement <16 x i32> [[SRC1]], i32 9
; CHECK-NEXT:    [[SRC1_SCALAR10:%.*]] = extractelement <16 x i32> [[SRC1]], i32 10
; CHECK-NEXT:    [[SRC1_SCALAR11:%.*]] = extractelement <16 x i32> [[SRC1]], i32 11
; CHECK-NEXT:    [[SRC1_SCALAR12:%.*]] = extractelement <16 x i32> [[SRC1]], i32 12
; CHECK-NEXT:    [[SRC1_SCALAR13:%.*]] = extractelement <16 x i32> [[SRC1]], i32 13
; CHECK-NEXT:    [[SRC1_SCALAR14:%.*]] = extractelement <16 x i32> [[SRC1]], i32 14
; CHECK-NEXT:    [[SRC1_SCALAR15:%.*]] = extractelement <16 x i32> [[SRC1]], i32 15
; CHECK-NEXT:    [[TMP1:%.*]] = alloca <16 x i32>, align 64
; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[SRC1_SCALAR]], [[SRC2_SCALAR]]
; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[SRC1_SCALAR1]], [[SRC2_SCALAR16]]
; CHECK-NEXT:    [[TMP4:%.*]] = add i32 [[SRC1_SCALAR2]], [[SRC2_SCALAR17]]
; CHECK-NEXT:    [[TMP5:%.*]] = add i32 [[SRC1_SCALAR3]], [[SRC2_SCALAR18]]
; CHECK-NEXT:    [[TMP6:%.*]] = add i32 [[SRC1_SCALAR4]], [[SRC2_SCALAR19]]
; CHECK-NEXT:    [[TMP7:%.*]] = add i32 [[SRC1_SCALAR5]], [[SRC2_SCALAR20]]
; CHECK-NEXT:    [[TMP8:%.*]] = add i32 [[SRC1_SCALAR6]], [[SRC2_SCALAR21]]
; CHECK-NEXT:    [[TMP9:%.*]] = add i32 [[SRC1_SCALAR7]], [[SRC2_SCALAR22]]
; CHECK-NEXT:    [[TMP10:%.*]] = add i32 [[SRC1_SCALAR8]], [[SRC2_SCALAR23]]
; CHECK-NEXT:    [[TMP11:%.*]] = add i32 [[SRC1_SCALAR9]], [[SRC2_SCALAR24]]
; CHECK-NEXT:    [[TMP12:%.*]] = add i32 [[SRC1_SCALAR10]], [[SRC2_SCALAR25]]
; CHECK-NEXT:    [[TMP13:%.*]] = add i32 [[SRC1_SCALAR11]], [[SRC2_SCALAR26]]
; CHECK-NEXT:    [[TMP14:%.*]] = add i32 [[SRC1_SCALAR12]], [[SRC2_SCALAR27]]
; CHECK-NEXT:    [[TMP15:%.*]] = add i32 [[SRC1_SCALAR13]], [[SRC2_SCALAR28]]
; CHECK-NEXT:    [[TMP16:%.*]] = add i32 [[SRC1_SCALAR14]], [[SRC2_SCALAR29]]
; CHECK-NEXT:    [[TMP17:%.*]] = add i32 [[SRC1_SCALAR15]], [[SRC2_SCALAR30]]
; CHECK-NEXT:    [[DOTASSEMBLED_VECT:%.*]] = insertelement <16 x i32> undef, i32 [[TMP2]], i32 0
; CHECK-NEXT:    [[DOTASSEMBLED_VECT31:%.*]] = insertelement <16 x i32> [[DOTASSEMBLED_VECT]], i32 [[TMP3]], i32 1
; CHECK-NEXT:    [[DOTASSEMBLED_VECT32:%.*]] = insertelement <16 x i32> [[DOTASSEMBLED_VECT31]], i32 [[TMP4]], i32 2
; CHECK-NEXT:    [[DOTASSEMBLED_VECT33:%.*]] = insertelement <16 x i32> [[DOTASSEMBLED_VECT32]], i32 [[TMP5]], i32 3
; CHECK-NEXT:    [[DOTASSEMBLED_VECT34:%.*]] = insertelement <16 x i32> [[DOTASSEMBLED_VECT33]], i32 [[TMP6]], i32 4
; CHECK-NEXT:    [[DOTASSEMBLED_VECT35:%.*]] = insertelement <16 x i32> [[DOTASSEMBLED_VECT34]], i32 [[TMP7]], i32 5
; CHECK-NEXT:    [[DOTASSEMBLED_VECT36:%.*]] = insertelement <16 x i32> [[DOTASSEMBLED_VECT35]], i32 [[TMP8]], i32 6
; CHECK-NEXT:    [[DOTASSEMBLED_VECT37:%.*]] = insertelement <16 x i32> [[DOTASSEMBLED_VECT36]], i32 [[TMP9]], i32 7
; CHECK-NEXT:    [[DOTASSEMBLED_VECT38:%.*]] = insertelement <16 x i32> [[DOTASSEMBLED_VECT37]], i32 [[TMP10]], i32 8
; CHECK-NEXT:    [[DOTASSEMBLED_VECT39:%.*]] = insertelement <16 x i32> [[DOTASSEMBLED_VECT38]], i32 [[TMP11]], i32 9
; CHECK-NEXT:    [[DOTASSEMBLED_VECT40:%.*]] = insertelement <16 x i32> [[DOTASSEMBLED_VECT39]], i32 [[TMP12]], i32 10
; CHECK-NEXT:    [[DOTASSEMBLED_VECT41:%.*]] = insertelement <16 x i32> [[DOTASSEMBLED_VECT40]], i32 [[TMP13]], i32 11
; CHECK-NEXT:    [[DOTASSEMBLED_VECT42:%.*]] = insertelement <16 x i32> [[DOTASSEMBLED_VECT41]], i32 [[TMP14]], i32 12
; CHECK-NEXT:    [[DOTASSEMBLED_VECT43:%.*]] = insertelement <16 x i32> [[DOTASSEMBLED_VECT42]], i32 [[TMP15]], i32 13
; CHECK-NEXT:    [[DOTASSEMBLED_VECT44:%.*]] = insertelement <16 x i32> [[DOTASSEMBLED_VECT43]], i32 [[TMP16]], i32 14
; CHECK-NEXT:    [[DOTASSEMBLED_VECT45:%.*]] = insertelement <16 x i32> [[DOTASSEMBLED_VECT44]], i32 [[TMP17]], i32 15
; CHECK-NEXT:    store <16 x i32> [[DOTASSEMBLED_VECT45]], <16 x i32>* [[TMP1]], align 64
; CHECK-NEXT:    ret void
;
  %1 = alloca <16 x i32>
  %2 = add <16 x i32> %src1, %src2
  store <16 x i32> %2, <16 x i32>* %1
  ret void
}

define spir_kernel void @should_work_with_bit_wise_instruction(<2 x i32> %src1, <2 x i32> %src2) {
; CHECK-LABEL: define spir_kernel void @should_work_with_bit_wise_instruction(
; CHECK-SAME: <2 x i32> [[SRC1:%.*]], <2 x i32> [[SRC2:%.*]]) {
; CHECK-NEXT:    [[SRC2_SCALAR:%.*]] = extractelement <2 x i32> [[SRC2]], i32 0
; CHECK-NEXT:    [[SRC2_SCALAR2:%.*]] = extractelement <2 x i32> [[SRC2]], i32 1
; CHECK-NEXT:    [[SRC1_SCALAR:%.*]] = extractelement <2 x i32> [[SRC1]], i32 0
; CHECK-NEXT:    [[SRC1_SCALAR1:%.*]] = extractelement <2 x i32> [[SRC1]], i32 1
; CHECK-NEXT:    [[TMP1:%.*]] = alloca <2 x i32>, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = shl i32 [[SRC1_SCALAR]], [[SRC2_SCALAR]]
; CHECK-NEXT:    [[TMP3:%.*]] = shl i32 [[SRC1_SCALAR1]], [[SRC2_SCALAR2]]
; CHECK-NEXT:    [[DOTASSEMBLED_VECT:%.*]] = insertelement <2 x i32> undef, i32 [[TMP2]], i32 0
; CHECK-NEXT:    [[DOTASSEMBLED_VECT3:%.*]] = insertelement <2 x i32> [[DOTASSEMBLED_VECT]], i32 [[TMP3]], i32 1
; CHECK-NEXT:    store <2 x i32> [[DOTASSEMBLED_VECT3]], <2 x i32>* [[TMP1]], align 8
; CHECK-NEXT:    ret void
;
  %1 = alloca <2 x i32>
  %2 = shl <2 x i32> %src1, %src2
  store <2 x i32> %2, <2 x i32>* %1
  ret void
}

define spir_kernel void @should_work_with_constant_value(<2 x i32> %src1) {
; CHECK-LABEL: define spir_kernel void @should_work_with_constant_value(
; CHECK-SAME: <2 x i32> [[SRC1:%.*]]) {
; CHECK-NEXT:    [[SRC1_SCALAR:%.*]] = extractelement <2 x i32> [[SRC1]], i32 0
; CHECK-NEXT:    [[SRC1_SCALAR1:%.*]] = extractelement <2 x i32> [[SRC1]], i32 1
; CHECK-NEXT:    [[TMP1:%.*]] = alloca <2 x i32>, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[SRC1_SCALAR]], 2
; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[SRC1_SCALAR1]], 4
; CHECK-NEXT:    [[DOTASSEMBLED_VECT:%.*]] = insertelement <2 x i32> undef, i32 [[TMP2]], i32 0
; CHECK-NEXT:    [[DOTASSEMBLED_VECT2:%.*]] = insertelement <2 x i32> [[DOTASSEMBLED_VECT]], i32 [[TMP3]], i32 1
; CHECK-NEXT:    store <2 x i32> [[DOTASSEMBLED_VECT2]], <2 x i32>* [[TMP1]], align 8
; CHECK-NEXT:    ret void
;
  %1 = alloca <2 x i32>
  %2 = add <2 x i32> %src1, <i32 2, i32 4>
  store <2 x i32> %2, <2 x i32>* %1
  ret void
}

define spir_kernel void @should_work_with_nuw_nsw(<2 x i32> %src1, <2 x i32> %src2) {
; CHECK-LABEL: define spir_kernel void @should_work_with_nuw_nsw(
; CHECK-SAME: <2 x i32> [[SRC1:%.*]], <2 x i32> [[SRC2:%.*]]) {
; CHECK-NEXT:    [[SRC2_SCALAR:%.*]] = extractelement <2 x i32> [[SRC2]], i32 0
; CHECK-NEXT:    [[SRC2_SCALAR2:%.*]] = extractelement <2 x i32> [[SRC2]], i32 1
; CHECK-NEXT:    [[SRC1_SCALAR:%.*]] = extractelement <2 x i32> [[SRC1]], i32 0
; CHECK-NEXT:    [[SRC1_SCALAR1:%.*]] = extractelement <2 x i32> [[SRC1]], i32 1
; CHECK-NEXT:    [[TMP1:%.*]] = alloca <2 x i32>, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = add nuw nsw i32 [[SRC1_SCALAR]], [[SRC2_SCALAR]]
; CHECK-NEXT:    [[TMP3:%.*]] = add nuw nsw i32 [[SRC1_SCALAR1]], [[SRC2_SCALAR2]]
; CHECK-NEXT:    [[DOTASSEMBLED_VECT:%.*]] = insertelement <2 x i32> undef, i32 [[TMP2]], i32 0
; CHECK-NEXT:    [[DOTASSEMBLED_VECT3:%.*]] = insertelement <2 x i32> [[DOTASSEMBLED_VECT]], i32 [[TMP3]], i32 1
; CHECK-NEXT:    store <2 x i32> [[DOTASSEMBLED_VECT3]], <2 x i32>* [[TMP1]], align 8
; CHECK-NEXT:    ret void
;
  %1 = alloca <2 x i32>
  %2 = add nuw nsw <2 x i32> %src1, %src2
  store <2 x i32> %2, <2 x i32>* %1
  ret void
}

; CHECK: [[META0]] = !{i32 0}