Tuesday, June 13, 2023

graphics/ffmpeg: patch and remove USE_NOBTCFI=Yes

Index: Makefile
===================================================================
RCS file: /cvs/ports/graphics/ffmpeg/Makefile,v
retrieving revision 1.233
diff -u -p -r1.233 Makefile
--- Makefile 12 Jun 2023 22:58:00 -0000 1.233
+++ Makefile 13 Jun 2023 07:58:36 -0000
@@ -3,12 +3,10 @@ COMMENT= audio/video converter and strea
V= 4.4.4
DISTNAME= ffmpeg-${V}
EPOCH= 1
-REVISION= 0
+REVISION= 1
CATEGORIES= graphics multimedia
MASTER_SITES= https://ffmpeg.org/releases/
EXTRACT_SUFX= .tar.xz
-
-USE_NOBTCFI= Yes

SHARED_LIBS= avcodec 25.0 \
avdevice 13.0 \
Index: patches/patch-libavcodec_x86_fft_asm
===================================================================
RCS file: /cvs/ports/graphics/ffmpeg/patches/patch-libavcodec_x86_fft_asm,v
retrieving revision 1.1
diff -u -p -r1.1 patch-libavcodec_x86_fft_asm
--- patches/patch-libavcodec_x86_fft_asm 24 Jan 2023 14:13:12 -0000 1.1
+++ patches/patch-libavcodec_x86_fft_asm 13 Jun 2023 07:58:36 -0000
@@ -1,7 +1,90 @@
+- place a table in rodata
+- use _CET_ENDBR in functions header
+
Index: libavcodec/x86/fft.asm
--- libavcodec/x86/fft.asm.orig
+++ libavcodec/x86/fft.asm
-@@ -548,10 +548,6 @@ DEFINE_ARGS zc, w, n, o1, o3
+@@ -325,6 +325,7 @@ INIT_YMM avx
+ %if HAVE_AVX_EXTERNAL
+ align 16
+ fft8_avx:
++ _CET_ENDBR
+ mova m0, Z(0)
+ mova m1, Z(1)
+ T8_AVX m0, m1, m2, m3, m4
+@@ -335,6 +336,7 @@ fft8_avx:
+
+ align 16
+ fft16_avx:
++ _CET_ENDBR
+ mova m2, Z(2)
+ mova m3, Z(3)
+ T4_SSE m2, m3, m7
+@@ -372,6 +374,7 @@ fft16_avx:
+
+ align 16
+ fft32_avx:
++ _CET_ENDBR
+ call fft16_avx
+
+ mova m0, Z(4)
+@@ -396,6 +399,7 @@ fft32_avx:
+ ret
+
+ fft32_interleave_avx:
++ _CET_ENDBR
+ call fft32_avx
+ mov r2d, 32
+ .deint_loop:
+@@ -419,6 +423,7 @@ INIT_XMM sse
+ align 16
+ fft4_avx:
+ fft4_sse:
++ _CET_ENDBR
+ mova m0, Z(0)
+ mova m1, Z(1)
+ T4_SSE m0, m1, m2
+@@ -428,6 +433,7 @@ fft4_sse:
+
+ align 16
+ fft8_sse:
++ _CET_ENDBR
+ mova m0, Z(0)
+ mova m1, Z(1)
+ T4_SSE m0, m1, m2
+@@ -442,6 +448,7 @@ fft8_sse:
+
+ align 16
+ fft16_sse:
++ _CET_ENDBR
+ mova m0, Z(0)
+ mova m1, Z(1)
+ T4_SSE m0, m1, m2
+@@ -465,6 +472,7 @@ fft16_sse:
+ %macro FFT48_3DNOW 0
+ align 16
+ fft4 %+ SUFFIX:
++ _CET_ENDBR
+ T2_3DNOW m0, m1, Z(0), Z(1)
+ mova m2, Z(2)
+ mova m3, Z(3)
+@@ -479,6 +487,7 @@ fft4 %+ SUFFIX:
+
+ align 16
+ fft8 %+ SUFFIX:
++ _CET_ENDBR
+ T2_3DNOW m0, m1, Z(0), Z(1)
+ mova m2, Z(2)
+ mova m3, Z(3)
+@@ -532,6 +541,7 @@ FFT48_3DNOW
+ %macro DECL_PASS 2+ ; name, payload
+ align 16
+ %1:
++ _CET_ENDBR
+ DEFINE_ARGS zc, w, n, o1, o3
+ lea o3q, [nq*3]
+ lea o1q, [nq*8]
+@@ -548,10 +558,6 @@ DEFINE_ARGS zc, w, n, o1, o3
%macro FFT_DISPATCH 2; clobbers 5 GPRs, 8 XMMs
lea r2, [dispatch_tab%1]
mov r2, [r2 + (%2q-2)*gprsize]
@@ -12,7 +95,7 @@ Index: libavcodec/x86/fft.asm
call r2
%endmacro ; FFT_DISPATCH

-@@ -731,11 +727,7 @@ DECL_PASS pass_interleave_3dnow, PASS_BIG 0
+@@ -731,11 +737,7 @@ DECL_PASS pass_interleave_3dnow, PASS_BIG 0
%define pass_interleave_3dnowext pass_interleave_3dnow
%endif

@@ -24,7 +107,15 @@ Index: libavcodec/x86/fft.asm

%macro DECL_FFT 1-2 ; nbits, suffix
%ifidn %0, 1
-@@ -773,8 +765,10 @@ fft %+ n %+ fullsuffix:
+@@ -759,6 +761,7 @@ DECL_PASS pass_interleave_3dnow, PASS_BIG 0
+
+ align 16
+ fft %+ n %+ fullsuffix:
++ _CET_ENDBR
+ call fft %+ n2 %+ SUFFIX
+ add r0, n*4 - (n&(-2<<%1))
+ call fft %+ n4 %+ SUFFIX
+@@ -773,8 +776,10 @@ fft %+ n %+ fullsuffix:
%endrep
%undef n

Index: patches/patch-libavutil_x86_x86inc_asm
===================================================================
RCS file: patches/patch-libavutil_x86_x86inc_asm
diff -N patches/patch-libavutil_x86_x86inc_asm
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ patches/patch-libavutil_x86_x86inc_asm 13 Jun 2023 07:58:36 -0000
@@ -0,0 +1,27 @@
+- define _CET_ENDBR depending X86_64 / X86
+- use it in cglobal macro
+
+Index: libavutil/x86/x86inc.asm
+--- libavutil/x86/x86inc.asm.orig
++++ libavutil/x86/x86inc.asm
+@@ -53,6 +53,12 @@
+ %endif
+ %endif
+
++%if ARCH_X86_64
++ %define _CET_ENDBR endbr64
++%else
++ %define _CET_ENDBR
++%endif
++
+ %define WIN64 0
+ %define UNIX64 0
+ %if ARCH_X86_64
+@@ -746,6 +752,7 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg,
+ %endif
+ align function_align
+ %2:
++ _CET_ENDBR
+ RESET_MM_PERMUTATION ; needed for x86-64, also makes disassembly somewhat nicer
+ %xdefine rstk rsp ; copy of the original stack pointer, used when greater alignment than the known stack alignment is required
+ %assign stack_offset 0 ; stack pointer offset relative to the return address
Hi,

We have marked graphics/ffmpeg (and some ports using it) with USE_NOBTCFI=Yes as
the library doesn't have proper function annotation regarding IBT on amd64.

But the list of ports using ffmpeg as library is huge:

$ show-reverse-deps -v graphics/ffmpeg | grep LIB_DEPENDS | wc -l
217

The following patch makes the ffmpeg functions to have the `endbr64'
instruction, and the testsuite of ffmpeg passes on IBT machine.

I added a _CET_ENDBR define (name following <cet.h> convention) as some code is
shared between amd64 and i386. So only amd64 should have `endbr64' instruction
(I don't tested it on i386, only checked that the %else branch would compile).

Most of the ffmpeg functions are using a macro for the declaration (cglobal). So
i just added _CET_ENDBR at the beginning.

The testsuite showed that libavcodec/x86/fft.asm has several functions not using
`cglobal' and a jumptable to properly choose between fft implementations
(depending cpuflags). I added _CET_ENDBR here too.

It should permit to avoid marking large portion of the port tree with
USE_NOBTCFI=Yes .

Comments or OK ?
--
Sebastien Marie

Index: Makefile
===================================================================
RCS file: /cvs/ports/graphics/ffmpeg/Makefile,v
retrieving revision 1.233
diff -u -p -r1.233 Makefile
--- Makefile 12 Jun 2023 22:58:00 -0000 1.233
+++ Makefile 13 Jun 2023 07:58:36 -0000
@@ -3,12 +3,10 @@ COMMENT= audio/video converter and strea
V= 4.4.4
DISTNAME= ffmpeg-${V}
EPOCH= 1
-REVISION= 0
+REVISION= 1
CATEGORIES= graphics multimedia
MASTER_SITES= https://ffmpeg.org/releases/
EXTRACT_SUFX= .tar.xz
-
-USE_NOBTCFI= Yes

SHARED_LIBS= avcodec 25.0 \
avdevice 13.0 \
Index: patches/patch-libavcodec_x86_fft_asm
===================================================================
RCS file: /cvs/ports/graphics/ffmpeg/patches/patch-libavcodec_x86_fft_asm,v
retrieving revision 1.1
diff -u -p -r1.1 patch-libavcodec_x86_fft_asm
--- patches/patch-libavcodec_x86_fft_asm 24 Jan 2023 14:13:12 -0000 1.1
+++ patches/patch-libavcodec_x86_fft_asm 13 Jun 2023 07:58:36 -0000
@@ -1,7 +1,90 @@
+- place a table in rodata
+- use _CET_ENDBR in functions header
+
Index: libavcodec/x86/fft.asm
--- libavcodec/x86/fft.asm.orig
+++ libavcodec/x86/fft.asm
-@@ -548,10 +548,6 @@ DEFINE_ARGS zc, w, n, o1, o3
+@@ -325,6 +325,7 @@ INIT_YMM avx
+ %if HAVE_AVX_EXTERNAL
+ align 16
+ fft8_avx:
++ _CET_ENDBR
+ mova m0, Z(0)
+ mova m1, Z(1)
+ T8_AVX m0, m1, m2, m3, m4
+@@ -335,6 +336,7 @@ fft8_avx:
+
+ align 16
+ fft16_avx:
++ _CET_ENDBR
+ mova m2, Z(2)
+ mova m3, Z(3)
+ T4_SSE m2, m3, m7
+@@ -372,6 +374,7 @@ fft16_avx:
+
+ align 16
+ fft32_avx:
++ _CET_ENDBR
+ call fft16_avx
+
+ mova m0, Z(4)
+@@ -396,6 +399,7 @@ fft32_avx:
+ ret
+
+ fft32_interleave_avx:
++ _CET_ENDBR
+ call fft32_avx
+ mov r2d, 32
+ .deint_loop:
+@@ -419,6 +423,7 @@ INIT_XMM sse
+ align 16
+ fft4_avx:
+ fft4_sse:
++ _CET_ENDBR
+ mova m0, Z(0)
+ mova m1, Z(1)
+ T4_SSE m0, m1, m2
+@@ -428,6 +433,7 @@ fft4_sse:
+
+ align 16
+ fft8_sse:
++ _CET_ENDBR
+ mova m0, Z(0)
+ mova m1, Z(1)
+ T4_SSE m0, m1, m2
+@@ -442,6 +448,7 @@ fft8_sse:
+
+ align 16
+ fft16_sse:
++ _CET_ENDBR
+ mova m0, Z(0)
+ mova m1, Z(1)
+ T4_SSE m0, m1, m2
+@@ -465,6 +472,7 @@ fft16_sse:
+ %macro FFT48_3DNOW 0
+ align 16
+ fft4 %+ SUFFIX:
++ _CET_ENDBR
+ T2_3DNOW m0, m1, Z(0), Z(1)
+ mova m2, Z(2)
+ mova m3, Z(3)
+@@ -479,6 +487,7 @@ fft4 %+ SUFFIX:
+
+ align 16
+ fft8 %+ SUFFIX:
++ _CET_ENDBR
+ T2_3DNOW m0, m1, Z(0), Z(1)
+ mova m2, Z(2)
+ mova m3, Z(3)
+@@ -532,6 +541,7 @@ FFT48_3DNOW
+ %macro DECL_PASS 2+ ; name, payload
+ align 16
+ %1:
++ _CET_ENDBR
+ DEFINE_ARGS zc, w, n, o1, o3
+ lea o3q, [nq*3]
+ lea o1q, [nq*8]
+@@ -548,10 +558,6 @@ DEFINE_ARGS zc, w, n, o1, o3
%macro FFT_DISPATCH 2; clobbers 5 GPRs, 8 XMMs
lea r2, [dispatch_tab%1]
mov r2, [r2 + (%2q-2)*gprsize]
@@ -12,7 +95,7 @@ Index: libavcodec/x86/fft.asm
call r2
%endmacro ; FFT_DISPATCH

-@@ -731,11 +727,7 @@ DECL_PASS pass_interleave_3dnow, PASS_BIG 0
+@@ -731,11 +737,7 @@ DECL_PASS pass_interleave_3dnow, PASS_BIG 0
%define pass_interleave_3dnowext pass_interleave_3dnow
%endif

@@ -24,7 +107,15 @@ Index: libavcodec/x86/fft.asm

%macro DECL_FFT 1-2 ; nbits, suffix
%ifidn %0, 1
-@@ -773,8 +765,10 @@ fft %+ n %+ fullsuffix:
+@@ -759,6 +761,7 @@ DECL_PASS pass_interleave_3dnow, PASS_BIG 0
+
+ align 16
+ fft %+ n %+ fullsuffix:
++ _CET_ENDBR
+ call fft %+ n2 %+ SUFFIX
+ add r0, n*4 - (n&(-2<<%1))
+ call fft %+ n4 %+ SUFFIX
+@@ -773,8 +776,10 @@ fft %+ n %+ fullsuffix:
%endrep
%undef n

Index: patches/patch-libavutil_x86_x86inc_asm
===================================================================
RCS file: patches/patch-libavutil_x86_x86inc_asm
diff -N patches/patch-libavutil_x86_x86inc_asm
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ patches/patch-libavutil_x86_x86inc_asm 13 Jun 2023 07:58:36 -0000
@@ -0,0 +1,27 @@
+- define _CET_ENDBR depending X86_64 / X86
+- use it in cglobal macro
+
+Index: libavutil/x86/x86inc.asm
+--- libavutil/x86/x86inc.asm.orig
++++ libavutil/x86/x86inc.asm
+@@ -53,6 +53,12 @@
+ %endif
+ %endif
+
++%if ARCH_X86_64
++ %define _CET_ENDBR endbr64
++%else
++ %define _CET_ENDBR
++%endif
++
+ %define WIN64 0
+ %define UNIX64 0
+ %if ARCH_X86_64
+@@ -746,6 +752,7 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg,
+ %endif
+ align function_align
+ %2:
++ _CET_ENDBR
+ RESET_MM_PERMUTATION ; needed for x86-64, also makes disassembly somewhat nicer
+ %xdefine rstk rsp ; copy of the original stack pointer, used when greater alignment than the known stack alignment is required
+ %assign stack_offset 0 ; stack pointer offset relative to the return address

No comments:

Post a Comment