@@ -465,6 +465,114 @@ define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal0_f32(ptr
465
465
ret void
466
466
}
467
467
468
+ define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal0_f32_dynamic (ptr addrspace (1 ) %out ) #5 {
469
+ ; GFX678-LABEL: test_no_denormals_fold_canonicalize_denormal0_f32_dynamic:
470
+ ; GFX678: ; %bb.0:
471
+ ; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
472
+ ; GFX678-NEXT: s_mov_b32 s2, 0x7fffff
473
+ ; GFX678-NEXT: v_mul_f32_e64 v2, 1.0, s2
474
+ ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
475
+ ; GFX678-NEXT: v_mov_b32_e32 v0, s0
476
+ ; GFX678-NEXT: v_mov_b32_e32 v1, s1
477
+ ; GFX678-NEXT: flat_store_dword v[0:1], v2
478
+ ; GFX678-NEXT: s_endpgm
479
+ ;
480
+ ; GFX9-LABEL: test_no_denormals_fold_canonicalize_denormal0_f32_dynamic:
481
+ ; GFX9: ; %bb.0:
482
+ ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
483
+ ; GFX9-NEXT: s_mov_b32 s2, 0x7fffff
484
+ ; GFX9-NEXT: v_mov_b32_e32 v0, 0
485
+ ; GFX9-NEXT: v_max_f32_e64 v1, s2, s2
486
+ ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
487
+ ; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
488
+ ; GFX9-NEXT: s_endpgm
489
+ ;
490
+ ; GFX11-LABEL: test_no_denormals_fold_canonicalize_denormal0_f32_dynamic:
491
+ ; GFX11: ; %bb.0:
492
+ ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
493
+ ; GFX11-NEXT: v_mov_b32_e32 v0, 0
494
+ ; GFX11-NEXT: v_max_f32_e64 v1, 0x7fffff, 0x7fffff
495
+ ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
496
+ ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
497
+ ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
498
+ ; GFX11-NEXT: s_endpgm
499
+ %canonicalized = call float @llvm.canonicalize.f32 (float bitcast (i32 8388607 to float ))
500
+ store float %canonicalized , ptr addrspace (1 ) %out
501
+ ret void
502
+ }
503
+
504
+ define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal0_f32_dynamic_out (ptr addrspace (1 ) %out ) #6 {
505
+ ; GFX678-LABEL: test_no_denormals_fold_canonicalize_denormal0_f32_dynamic_out:
506
+ ; GFX678: ; %bb.0:
507
+ ; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
508
+ ; GFX678-NEXT: s_mov_b32 s2, 0x7fffff
509
+ ; GFX678-NEXT: v_mul_f32_e64 v2, 1.0, s2
510
+ ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
511
+ ; GFX678-NEXT: v_mov_b32_e32 v0, s0
512
+ ; GFX678-NEXT: v_mov_b32_e32 v1, s1
513
+ ; GFX678-NEXT: flat_store_dword v[0:1], v2
514
+ ; GFX678-NEXT: s_endpgm
515
+ ;
516
+ ; GFX9-LABEL: test_no_denormals_fold_canonicalize_denormal0_f32_dynamic_out:
517
+ ; GFX9: ; %bb.0:
518
+ ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
519
+ ; GFX9-NEXT: s_mov_b32 s2, 0x7fffff
520
+ ; GFX9-NEXT: v_mov_b32_e32 v0, 0
521
+ ; GFX9-NEXT: v_max_f32_e64 v1, s2, s2
522
+ ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
523
+ ; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
524
+ ; GFX9-NEXT: s_endpgm
525
+ ;
526
+ ; GFX11-LABEL: test_no_denormals_fold_canonicalize_denormal0_f32_dynamic_out:
527
+ ; GFX11: ; %bb.0:
528
+ ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
529
+ ; GFX11-NEXT: v_mov_b32_e32 v0, 0
530
+ ; GFX11-NEXT: v_max_f32_e64 v1, 0x7fffff, 0x7fffff
531
+ ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
532
+ ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
533
+ ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
534
+ ; GFX11-NEXT: s_endpgm
535
+ %canonicalized = call float @llvm.canonicalize.f32 (float bitcast (i32 8388607 to float ))
536
+ store float %canonicalized , ptr addrspace (1 ) %out
537
+ ret void
538
+ }
539
+
540
+ define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal0_f32_dynamic_in (ptr addrspace (1 ) %out ) #7 {
541
+ ; GFX678-LABEL: test_no_denormals_fold_canonicalize_denormal0_f32_dynamic_in:
542
+ ; GFX678: ; %bb.0:
543
+ ; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
544
+ ; GFX678-NEXT: s_mov_b32 s2, 0x7fffff
545
+ ; GFX678-NEXT: v_mul_f32_e64 v2, 1.0, s2
546
+ ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
547
+ ; GFX678-NEXT: v_mov_b32_e32 v0, s0
548
+ ; GFX678-NEXT: v_mov_b32_e32 v1, s1
549
+ ; GFX678-NEXT: flat_store_dword v[0:1], v2
550
+ ; GFX678-NEXT: s_endpgm
551
+ ;
552
+ ; GFX9-LABEL: test_no_denormals_fold_canonicalize_denormal0_f32_dynamic_in:
553
+ ; GFX9: ; %bb.0:
554
+ ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
555
+ ; GFX9-NEXT: s_mov_b32 s2, 0x7fffff
556
+ ; GFX9-NEXT: v_mov_b32_e32 v0, 0
557
+ ; GFX9-NEXT: v_max_f32_e64 v1, s2, s2
558
+ ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
559
+ ; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
560
+ ; GFX9-NEXT: s_endpgm
561
+ ;
562
+ ; GFX11-LABEL: test_no_denormals_fold_canonicalize_denormal0_f32_dynamic_in:
563
+ ; GFX11: ; %bb.0:
564
+ ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
565
+ ; GFX11-NEXT: v_mov_b32_e32 v0, 0
566
+ ; GFX11-NEXT: v_max_f32_e64 v1, 0x7fffff, 0x7fffff
567
+ ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
568
+ ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
569
+ ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
570
+ ; GFX11-NEXT: s_endpgm
571
+ %canonicalized = call float @llvm.canonicalize.f32 (float bitcast (i32 8388607 to float ))
572
+ store float %canonicalized , ptr addrspace (1 ) %out
573
+ ret void
574
+ }
575
+
468
576
define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal0_f32 (ptr addrspace (1 ) %out ) #3 {
469
577
; GFX678-LABEL: test_denormals_fold_canonicalize_denormal0_f32:
470
578
; GFX678: ; %bb.0:
@@ -2400,3 +2508,6 @@ attributes #1 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign"
2400
2508
attributes #2 = { nounwind "denormal-fp-math" ="preserve-sign,preserve-sign" }
2401
2509
attributes #3 = { nounwind "denormal-fp-math" ="ieee,ieee" }
2402
2510
attributes #4 = { nounwind "denormal-fp-math" ="preserve-sign,preserve-sign" }
2511
+ attributes #5 = { nounwind "denormal-fp-math-f32" ="dynamic,dynamic" }
2512
+ attributes #6 = { nounwind "denormal-fp-math-f32" ="dynamic,ieee" }
2513
+ attributes #7 = { nounwind "denormal-fp-math-f32" ="ieee,dynamic" }
0 commit comments