@@ -47,33 +47,38 @@ entry:
4747
4848; CHECK: define internal void @diffef(i8* %C, i8* %"C'", i8* %A, i8* %"A'", i8* %B, i8* %"B'")
4949; CHECK-NEXT: entry:
50- ; CHECK-NEXT: %ret = alloca double
51- ; CHECK-NEXT: %byref.transpose.transa = alloca i8
52- ; CHECK-NEXT: %byref.transpose.transb = alloca i8
53- ; CHECK-NEXT: %byref.int.one = alloca i64
54- ; CHECK-NEXT: %byref.constant.char.G = alloca i8
55- ; CHECK-NEXT: %byref.constant.int.0 = alloca i64
56- ; CHECK-NEXT: %[[byrefconstantint1:.+]] = alloca i64
57- ; CHECK-NEXT: %byref.constant.fp.1.0 = alloca double
58- ; CHECK-NEXT: %[[byrefconstantint2:.+]] = alloca i64
59- ; CHECK-NEXT: %transa = alloca i8, align 1
60- ; CHECK-NEXT: %transb = alloca i8, align 1
61- ; CHECK-NEXT: %m = alloca i64, align 16
62- ; CHECK-NEXT: %m_p = bitcast i64* %m to i8*
63- ; CHECK-NEXT: %n = alloca i64, align 16
64- ; CHECK-NEXT: %n_p = bitcast i64* %n to i8*
65- ; CHECK-NEXT: %k = alloca i64, align 16
66- ; CHECK-NEXT: %k_p = bitcast i64* %k to i8*
67- ; CHECK-NEXT: %alpha = alloca double, align 16
68- ; CHECK-NEXT: %alpha_p = bitcast double* %alpha to i8*
69- ; CHECK-NEXT: %lda = alloca i64, align 16
70- ; CHECK-NEXT: %lda_p = bitcast i64* %lda to i8*
71- ; CHECK-NEXT: %ldb = alloca i64, align 16
72- ; CHECK-NEXT: %ldb_p = bitcast i64* %ldb to i8*
73- ; CHECK-NEXT: %beta = alloca double, align 16
74- ; CHECK-NEXT: %beta_p = bitcast double* %beta to i8*
75- ; CHECK-NEXT: %ldc = alloca i64, align 16
76- ; CHECK-NEXT: %ldc_p = bitcast i64* %ldc to i8*
50+ ; CHECK-DAG: %ret = alloca double
51+ ; CHECK-DAG: %byref.transpose.transa = alloca i8
52+ ; CHECK-DAG: %byref.transpose.transb = alloca i8
53+ ; CHECK-DAG: %byref.int.one = alloca i64
54+ ; CHECK-DAG: %byref.constant.char.T = alloca i8, align 1
55+ ; CHECK-DAG: %byref.constant.char.N = alloca i8, align 1
56+ ; CHECK-DAG: %byref.constant.fp.1.0 = alloca double
57+ ; CHECK-DAG: %byref.constant.char.T2 = alloca i8, align 1
58+ ; CHECK-DAG: %byref.constant.char.N3 = alloca i8, align 1
59+ ; CHECK-DAG: %byref.constant.fp.1.06 = alloca double
60+ ; CHECK-DAG: %byref.constant.char.G = alloca i8
61+ ; CHECK-DAG: %byref.constant.int.0 = alloca i64
62+ ; CHECK-DAG: %[[byrefconstantint1:.+]] = alloca i64
63+ ; CHECK-DAG: %byref.constant.fp.1.010 = alloca double
64+ ; CHECK-DAG: %transa = alloca i8, align 1
65+ ; CHECK-DAG: %transb = alloca i8, align 1
66+ ; CHECK-DAG: %m = alloca i64, align 16
67+ ; CHECK-DAG: %m_p = bitcast i64* %m to i8*
68+ ; CHECK-DAG: %n = alloca i64, align 16
69+ ; CHECK-DAG: %n_p = bitcast i64* %n to i8*
70+ ; CHECK-DAG: %k = alloca i64, align 16
71+ ; CHECK-DAG: %k_p = bitcast i64* %k to i8*
72+ ; CHECK-DAG: %alpha = alloca double, align 16
73+ ; CHECK-DAG: %alpha_p = bitcast double* %alpha to i8*
74+ ; CHECK-DAG: %lda = alloca i64, align 16
75+ ; CHECK-DAG: %lda_p = bitcast i64* %lda to i8*
76+ ; CHECK-DAG: %ldb = alloca i64, align 16
77+ ; CHECK-DAG: %ldb_p = bitcast i64* %ldb to i8*
78+ ; CHECK-DAG: %beta = alloca double, align 16
79+ ; CHECK-DAG: %beta_p = bitcast double* %beta to i8*
80+ ; CHECK-DAG: %ldc = alloca i64, align 16
81+ ; CHECK-DAG: %ldc_p = bitcast i64* %ldc to i8*
7782; CHECK-NEXT: store i8 78, i8* %transa, align 1
7883; CHECK-NEXT: store i8 78, i8* %transb, align 1
7984; CHECK-NEXT: store i64 4, i64* %m, align 16
@@ -110,17 +115,56 @@ entry:
110115; CHECK-NEXT: store i8 %[[i25]], i8* %byref.transpose.transb
111116; CHECK-NEXT: store i64 1, i64* %byref.int.one
112117; CHECK-NEXT: %intcast.int.one = bitcast i64* %byref.int.one to i8*
113- ; CHECK-NEXT: call void @dgemm_64_(i8* %transa, i8* %byref.transpose.transb, i8* %m_p, i8* %k_p, i8* %n_p, i8* %alpha_p, i8* %"C'", i8* %ldc_p, i8* %B, i8* %ldb_p, i8* %beta_p, i8* %"A'", i8* %lda_p, i64 1, i64 1)
114- ; CHECK-NEXT: call void @dgemm_64_(i8* %byref.transpose.transa, i8* %transb, i8* %k_p, i8* %n_p, i8* %m_p, i8* %alpha_p, i8* %A, i8* %lda_p, i8* %"C'", i8* %ldc_p, i8* %beta_p, i8* %"B'", i8* %ldb_p, i64 1, i64 1)
118+
119+ ; CHECK-NEXT: store i8 84, i8* %byref.constant.char.T, align 1
120+ ; CHECK-NEXT: store i8 78, i8* %byref.constant.char.N, align 1
121+ ; CHECK-NEXT: %ld.row.trans = load i8, i8* %transa, align 1
122+ ; CHECK-NEXT: %[[a16:.+]] = icmp eq i8 %ld.row.trans, 110
123+ ; CHECK-NEXT: %[[a17:.+]] = icmp eq i8 %ld.row.trans, 78
124+ ; CHECK-NEXT: %[[a18:.+]] = or i1 %[[a17]], %[[a16]]
125+ ; CHECK-NEXT: %[[a19:.+]] = select i1 %[[a18]], i8* %byref.constant.char.N, i8* %transb
126+ ; CHECK-NEXT: %[[a20:.+]] = select i1 %[[a18]], i8* %byref.transpose.transb, i8* %byref.constant.char.T
127+ ; CHECK-NEXT: %[[a21:.+]] = select i1 %[[a18]], i8* %m_p, i8* %k_p
128+ ; CHECK-NEXT: %[[a22:.+]] = select i1 %[[a18]], i8* %k_p, i8* %m_p
129+ ; CHECK-NEXT: %ld.row.trans1 = load i8, i8* %transa, align 1
130+ ; CHECK-NEXT: %[[a23:.+]] = icmp eq i8 %ld.row.trans1, 110
131+ ; CHECK-NEXT: %[[a24:.+]] = icmp eq i8 %ld.row.trans1, 78
132+ ; CHECK-NEXT: %[[a25:.+]] = or i1 %[[a24]], %[[a23]]
133+ ; CHECK-NEXT: %[[a26:.+]] = select i1 %[[a25]], i8* %"C'", i8* %B
134+ ; CHECK-NEXT: %[[a27:.+]] = select i1 %[[a25]], i8* %ldc_p, i8* %ldb_p
135+ ; CHECK-NEXT: %[[a28:.+]] = select i1 %[[a25]], i8* %B, i8* %"C'"
136+ ; CHECK-NEXT: %[[a29:.+]] = select i1 %[[a25]], i8* %ldb_p, i8* %ldc_p
137+ ; CHECK-NEXT: store double 1.000000e+00, double* %byref.constant.fp.1.0, align 8
138+ ; CHECK-NEXT: %fpcast.constant.fp.1.0 = bitcast double* %byref.constant.fp.1.0 to i8*
139+ ; CHECK-NEXT: call void @dgemm_64_(i8* %[[a19]], i8* %[[a20]], i8* %[[a21]], i8* %[[a22]], i8* %n_p, i8* %alpha_p, i8* %[[a26]], i8* %[[a27]], i8* %[[a28]], i8* %[[a29]], i8* %fpcast.constant.fp.1.0, i8* %"A'", i8* %lda_p, i64 1, i64 1)
140+ ; CHECK-NEXT: store i8 84, i8* %byref.constant.char.T2, align 1
141+ ; CHECK-NEXT: store i8 78, i8* %byref.constant.char.N3, align 1
142+ ; CHECK-NEXT: %ld.row.trans4 = load i8, i8* %transb, align 1
143+ ; CHECK-NEXT: %[[a30:.+]] = icmp eq i8 %ld.row.trans4, 110
144+ ; CHECK-NEXT: %[[a31:.+]] = icmp eq i8 %ld.row.trans4, 78
145+ ; CHECK-NEXT: %[[a32:.+]] = or i1 %[[a31]], %[[a30]]
146+ ; CHECK-NEXT: %[[a33:.+]] = select i1 %[[a32]], i8* %byref.transpose.transa, i8* %byref.constant.char.T2
147+ ; CHECK-NEXT: %[[a34:.+]] = select i1 %[[a32]], i8* %byref.constant.char.N3, i8* %transa
148+ ; CHECK-NEXT: %[[a35:.+]] = select i1 %[[a32]], i8* %k_p, i8* %n_p
149+ ; CHECK-NEXT: %[[a36:.+]] = select i1 %[[a32]], i8* %n_p, i8* %k_p
150+ ; CHECK-NEXT: %ld.row.trans5 = load i8, i8* %transb, align 1
151+ ; CHECK-NEXT: %[[a37:.+]] = icmp eq i8 %ld.row.trans5, 110
152+ ; CHECK-NEXT: %[[a38:.+]] = icmp eq i8 %ld.row.trans5, 78
153+ ; CHECK-NEXT: %[[a39:.+]] = or i1 %[[a38]], %[[a37]]
154+ ; CHECK-NEXT: %[[a40:.+]] = select i1 %[[a39]], i8* %A, i8* %"C'"
155+ ; CHECK-NEXT: %[[a41:.+]] = select i1 %[[a39]], i8* %lda_p, i8* %ldc_p
156+ ; CHECK-NEXT: %[[a42:.+]] = select i1 %[[a39]], i8* %"C'", i8* %A
157+ ; CHECK-NEXT: %[[a43:.+]] = select i1 %[[a39]], i8* %ldc_p, i8* %lda_p
158+ ; CHECK-NEXT: store double 1.000000e+00, double* %byref.constant.fp.1.06, align 8
159+ ; CHECK-NEXT: %fpcast.constant.fp.1.07 = bitcast double* %byref.constant.fp.1.06 to i8*
160+ ; CHECK-NEXT: call void @dgemm_64_(i8* %[[a33]], i8* %[[a34]], i8* %[[a35]], i8* %[[a36]], i8* %m_p, i8* %alpha_p, i8* %[[a40]], i8* %[[a41]], i8* %[[a42]], i8* %[[a43]], i8* %fpcast.constant.fp.1.07, i8* %"B'", i8* %ldb_p, i64 1, i64 1)
115161; CHECK-NEXT: store i8 71, i8* %byref.constant.char.G
116162; CHECK-NEXT: store i64 0, i64* %byref.constant.int.0
117163; CHECK-NEXT: %intcast.constant.int.0 = bitcast i64* %byref.constant.int.0 to i8*
118164; CHECK-NEXT: store i64 0, i64* %[[byrefconstantint1]]
119- ; CHECK-NEXT: %intcast.constant.int.02 = bitcast i64* %byref.constant.int.01 to i8*
165+ ; CHECK-NEXT: %[[int02:.+]] = bitcast i64* %[[byrefconstantint1]] to i8*
120166; CHECK-NEXT: store double 1.000000e+00, double* %byref.constant.fp.1.0
121- ; CHECK-NEXT: %fpcast.constant.fp.1.0 = bitcast double* %byref.constant.fp.1.0 to i8*
122- ; CHECK-NEXT: store i64 0, i64* %[[byrefconstantint2]]
123- ; CHECK-NEXT: %intcast.constant.int.04 = bitcast i64* %byref.constant.int.03 to i8*
124- ; CHECK-NEXT: call void @dlascl_64_(i8* %byref.constant.char.G, i8* %intcast.constant.int.0, i8* %intcast.constant.int.02, i8* %fpcast.constant.fp.1.0, i8* %beta_p, i8* %m_p, i8* %n_p, i8* %"C'", i8* %ldc_p, i8* %intcast.constant.int.04)
167+ ; CHECK-NEXT: %[[fp11:.+]] = bitcast double* %byref.constant.fp.1.010 to i8*
168+ ; CHECK-NEXT: call void @dlascl_64_(i8* %byref.constant.char.G, i8* %intcast.constant.int.0, i8* %[[int02]], i8* %[[fp11]], i8* %beta_p, i8* %m_p, i8* %n_p, i8* %"C'", i8* %ldc_p, i64 1)
125169; CHECK-NEXT: ret void
126170; CHECK-NEXT: }
0 commit comments