Index: src/aarch64/ffi.c
--- src/aarch64/ffi.c.orig
+++ src/aarch64/ffi.c
@@ -390,47 +390,59 @@ extend_hfa_type (void *dest, void *src, int h)
 	"adr	%0, 0f\n"
 "	add	%0, %0, %1\n"
 "	br	%0\n"
-"0:	ldp	s16, s17, [%3]\n"	/* S4 */
+"0:	bti	j\n"			/* S4 */
+"	ldp	s16, s17, [%3]\n"
 "	ldp	s18, s19, [%3, #8]\n"
 "	b	4f\n"
-"	ldp	s16, s17, [%3]\n"	/* S3 */
+"	bti	j\n"			/* S3 */
+"	ldp	s16, s17, [%3]\n"
 "	ldr	s18, [%3, #8]\n"
 "	b	3f\n"
-"	ldp	s16, s17, [%3]\n"	/* S2 */
+"	bti	j\n"			/* S2 */
+"	ldp	s16, s17, [%3]\n"
 "	b	2f\n"
 "	nop\n"
-"	ldr	s16, [%3]\n"		/* S1 */
+"	bti	j\n"			/* S1 */
+"	ldr	s16, [%3]\n"
 "	b	1f\n"
 "	nop\n"
-"	ldp	d16, d17, [%3]\n"	/* D4 */
+"	bti	j\n"			/* D4 */
+"	ldp	d16, d17, [%3]\n"
 "	ldp	d18, d19, [%3, #16]\n"
 "	b	4f\n"
-"	ldp	d16, d17, [%3]\n"	/* D3 */
+"	bti	j\n"			/* D3 */
+"	ldp	d16, d17, [%3]\n"
 "	ldr	d18, [%3, #16]\n"
 "	b	3f\n"
-"	ldp	d16, d17, [%3]\n"	/* D2 */
+"	bti	j\n"			/* D2 */
+"	ldp	d16, d17, [%3]\n"
 "	b	2f\n"
 "	nop\n"
-"	ldr	d16, [%3]\n"		/* D1 */
+"	bti	j\n"			/* D1 */
+"	ldr	d16, [%3]\n"
 "	b	1f\n"
 "	nop\n"
-"	ldp	q16, q17, [%3]\n"	/* Q4 */
+"	bti	j\n"			/* Q4 */
+"	ldp	q16, q17, [%3]\n"
 "	ldp	q18, q19, [%3, #32]\n"
 "	b	4f\n"
-"	ldp	q16, q17, [%3]\n"	/* Q3 */
+"	bti	j\n"			/* Q3 */
+"	ldp	q16, q17, [%3]\n"
 "	ldr	q18, [%3, #32]\n"
 "	b	3f\n"
-"	ldp	q16, q17, [%3]\n"	/* Q2 */
+"	bti	j\n"			/* Q2 */
+"	ldp	q16, q17, [%3]\n"
 "	b	2f\n"
 "	nop\n"
-"	ldr	q16, [%3]\n"		/* Q1 */
+"	bti	j\n"			/* Q1 */
+"	ldr	q16, [%3]\n"
 "	b	1f\n"
 "4:	str	q19, [%2, #48]\n"
 "3:	str	q18, [%2, #32]\n"
 "2:	str	q17, [%2, #16]\n"
 "1:	str	q16, [%2]"
     : "=&r"(x0)
-    : "r"(f * 12), "r"(dest), "r"(src)
+    : "r"(f * 16), "r"(dest), "r"(src)
     : "memory", "v16", "v17", "v18", "v19");
 }
 #endif
@@ -873,8 +885,9 @@ ffi_prep_closure_loc (ffi_closure *closure,
 # endif
 #else
   static const unsigned char trampoline[16] = {
-    0x90, 0x00, 0x00, 0x58,	/* ldr	x16, tramp+16	*/
-    0xf1, 0xff, 0xff, 0x10,	/* adr	x17, tramp+0	*/
+    0x5f, 0x24, 0x03, 0xd5,	/* bti	c		*/
+    0x70, 0x00, 0x00, 0x58,	/* ldr	x16, tramp+16	*/
+    0xd1, 0xff, 0xff, 0x10,	/* adr	x17, tramp+0	*/
     0x00, 0x02, 0x1f, 0xd6	/* br	x16		*/
   };
   char *tramp = closure->tramp;
