Skip to content

Commit 8964316

Browse files
filter: add scale_d3d11 support for MF encoders
1 parent 5bdeb59 commit 8964316

16 files changed

+17344
-4
lines changed

contrib/ffmpeg/A32-avfilter-avfilter-Add-FFFilter-hide-internals-of-AVF.patch

Lines changed: 15972 additions & 0 deletions
Large diffs are not rendered by default.

contrib/ffmpeg/A33-avfilter-add-scale_d3d11-filter.patch

Lines changed: 563 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 365 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,365 @@
1+
From d56522c6eb754cd5c648bc0c0b39bd4fbd77c47c Mon Sep 17 00:00:00 2001
2+
From: Dash Santosh <dash.sathyanarayanan@multicorewareinc.com>
3+
Date: Thu, 22 May 2025 05:34:37 -0700
4+
Subject: [PATCH 02/13] avcodec/mfenc: add support for D3D11 input surfaces
5+
6+
Adds D3D11 input surface support to the MediaFoundation encoder (mfenc),
7+
allowing direct encoding of GPU frames without readback to system memory.
8+
This improves performance and compatibility when used alongside scale_d3d11.
9+
10+
Signed-off-by: Dash Santosh <dash.sathyanarayanan@multicorewareinc.com>
11+
---
12+
libavcodec/mf_utils.h | 7 ++
13+
libavcodec/mfenc.c | 207 ++++++++++++++++++++++++++++++++++++------
14+
2 files changed, 185 insertions(+), 29 deletions(-)
15+
16+
diff --git a/libavcodec/mf_utils.h b/libavcodec/mf_utils.h
17+
index a59b36d015..ecebb6fcdf 100644
18+
--- a/libavcodec/mf_utils.h
19+
+++ b/libavcodec/mf_utils.h
20+
@@ -53,6 +53,13 @@ typedef struct MFFunctions {
21+
IMFMediaBuffer **ppBuffer);
22+
HRESULT (WINAPI *MFCreateSample) (IMFSample **ppIMFSample);
23+
HRESULT (WINAPI *MFCreateMediaType) (IMFMediaType **ppMFType);
24+
+ HRESULT (WINAPI *MFCreateDXGISurfaceBuffer) (REFIID riid,
25+
+ IUnknown* punkSurface,
26+
+ UINT uSubresourceIndex,
27+
+ BOOL fBottomUpWhenLinear,
28+
+ IMFMediaBuffer** ppBuffer);
29+
+ HRESULT (WINAPI *MFCreateDXGIDeviceManager) (UINT* resetToken,
30+
+ IMFDXGIDeviceManager** ppDeviceManager);
31+
// MFTEnumEx is missing in Windows Vista's mfplat.dll.
32+
HRESULT (WINAPI *MFTEnumEx)(GUID guidCategory, UINT32 Flags,
33+
const MFT_REGISTER_TYPE_INFO *pInputType,
34+
diff --git a/libavcodec/mfenc.c b/libavcodec/mfenc.c
35+
index c9e2191fde..912984c306 100644
36+
--- a/libavcodec/mfenc.c
37+
+++ b/libavcodec/mfenc.c
38+
@@ -31,10 +31,18 @@
39+
#include "codec_internal.h"
40+
#include "internal.h"
41+
#include "compat/w32dlfcn.h"
42+
+#if CONFIG_D3D11VA
43+
+#include "libavutil/hwcontext_d3d11va.h"
44+
+#endif
45+
46+
typedef struct MFContext {
47+
AVClass *av_class;
48+
HMODULE library;
49+
+ HMODULE d3d_dll;
50+
+ ID3D11DeviceContext* d3d_context;
51+
+ IMFDXGIDeviceManager *dxgiManager;
52+
+ int resetToken;
53+
+
54+
MFFunctions functions;
55+
AVFrame *frame;
56+
int is_video, is_audio;
57+
@@ -47,6 +55,7 @@ typedef struct MFContext {
58+
int out_stream_provides_samples;
59+
int draining, draining_done;
60+
int sample_sent;
61+
+ int stream_started;
62+
int async_need_input, async_have_output, async_marker;
63+
int64_t reorder_delay;
64+
ICodecAPI *codec_api;
65+
@@ -55,6 +64,7 @@ typedef struct MFContext {
66+
int opt_enc_quality;
67+
int opt_enc_scenario;
68+
int opt_enc_hw;
69+
+ AVD3D11VADeviceContext* device_hwctx;
70+
} MFContext;
71+
72+
static int mf_choose_output_type(AVCodecContext *avctx);
73+
@@ -303,36 +313,118 @@ static IMFSample *mf_a_avframe_to_sample(AVCodecContext *avctx, const AVFrame *f
74+
return sample;
75+
}
76+
77+
-static IMFSample *mf_v_avframe_to_sample(AVCodecContext *avctx, const AVFrame *frame)
78+
+static int initialize_dxgi_manager(AVCodecContext *avctx)
79+
{
80+
MFContext *c = avctx->priv_data;
81+
- IMFSample *sample;
82+
- IMFMediaBuffer *buffer;
83+
- BYTE *data;
84+
+ MFFunctions *func = &c->functions;
85+
HRESULT hr;
86+
- int ret;
87+
- int size;
88+
+
89+
+ hr = func->MFCreateDXGIDeviceManager(&c->resetToken, &c->dxgiManager);
90+
+ if (FAILED(hr)) {
91+
+ av_log(avctx, AV_LOG_ERROR, "Failed to create DXGI device manager: %s\n", ff_hr_str(hr));
92+
+ return AVERROR_EXTERNAL;
93+
+ }
94+
+
95+
+ hr = IMFDXGIDeviceManager_ResetDevice(c->dxgiManager, c->device_hwctx->device, c->resetToken);
96+
+ if (FAILED(hr)) {
97+
+ av_log(avctx, AV_LOG_ERROR, "Failed to reset device: %s\n", ff_hr_str(hr));
98+
+ return AVERROR_EXTERNAL;
99+
+ }
100+
+
101+
+ hr = IMFTransform_ProcessMessage(c->mft, MFT_MESSAGE_SET_D3D_MANAGER, (ULONG_PTR)c->dxgiManager);
102+
+ if (FAILED(hr)) {
103+
+ av_log(avctx, AV_LOG_ERROR, "Failed to set D3D manager: %s\n", ff_hr_str(hr));
104+
+ return AVERROR_EXTERNAL;
105+
+ }
106+
+
107+
+ return 0;
108+
+}
109+
+
110+
+static int process_d3d11_frame(AVCodecContext *avctx, const AVFrame *frame, IMFSample **out_sample)
111+
+{
112+
+ MFContext *c = avctx->priv_data;
113+
+ MFFunctions *func = &c->functions;
114+
+ AVHWFramesContext *frames_ctx = NULL;
115+
+ ID3D11Texture2D *d3d11_texture = NULL;
116+
+ IMFSample *sample = NULL;
117+
+ IMFMediaBuffer *buffer = NULL;
118+
+ int subIdx = 0;
119+
+ HRESULT hr;
120+
+
121+
+ frames_ctx = (AVHWFramesContext*)frame->hw_frames_ctx->data;
122+
+ c->device_hwctx = (AVD3D11VADeviceContext*)frames_ctx->device_ctx->hwctx;
123+
+
124+
+ if (!c->dxgiManager) {
125+
+ hr = initialize_dxgi_manager(avctx);
126+
+ if (FAILED(hr)) {
127+
+ return AVERROR_EXTERNAL;
128+
+ }
129+
+ }
130+
+
131+
+ d3d11_texture = (ID3D11Texture2D*)frame->data[0];
132+
+ subIdx = (int)(intptr_t)frame->data[1];
133+
+
134+
+ if (!d3d11_texture) {
135+
+ av_log(avctx, AV_LOG_ERROR, "D3D11 texture not found\n");
136+
+ return AVERROR(EINVAL);
137+
+ }
138+
+
139+
+ hr = func->MFCreateSample(&sample);
140+
+ if (FAILED(hr)) {
141+
+ av_log(avctx, AV_LOG_ERROR, "Failed to create MF sample: %s\n", ff_hr_str(hr));
142+
+ return AVERROR_EXTERNAL;
143+
+ }
144+
+
145+
+ hr = func->MFCreateDXGISurfaceBuffer(&IID_ID3D11Texture2D, d3d11_texture, subIdx, 0, &buffer);
146+
+ if (FAILED(hr)) {
147+
+ av_log(avctx, AV_LOG_ERROR, "Failed to create DXGI surface buffer: %s\n", ff_hr_str(hr));
148+
+ IMFSample_Release(sample);
149+
+ return AVERROR_EXTERNAL;
150+
+ }
151+
+
152+
+ hr = IMFSample_AddBuffer(sample, buffer);
153+
+ if (FAILED(hr)) {
154+
+ av_log(avctx, AV_LOG_ERROR, "Failed to add buffer to sample: %s\n", ff_hr_str(hr));
155+
+ IMFMediaBuffer_Release(buffer);
156+
+ IMFSample_Release(sample);
157+
+ return AVERROR_EXTERNAL;
158+
+ }
159+
+
160+
+ IMFMediaBuffer_Release(buffer);
161+
+
162+
+ *out_sample = sample;
163+
+ return 0;
164+
+}
165+
+
166+
+static int process_software_frame(AVCodecContext *avctx, const AVFrame *frame, IMFSample **out_sample)
167+
+{
168+
+ MFContext *c = avctx->priv_data;
169+
+ IMFSample *sample = NULL;
170+
+ IMFMediaBuffer *buffer = NULL;
171+
+ BYTE *data = NULL;
172+
+ HRESULT hr;
173+
+ int size, ret;
174+
175+
size = av_image_get_buffer_size(avctx->pix_fmt, avctx->width, avctx->height, 1);
176+
if (size < 0)
177+
- return NULL;
178+
+ return AVERROR_EXTERNAL;
179+
180+
sample = ff_create_memory_sample(&c->functions, NULL, size,
181+
c->in_info.cbAlignment);
182+
if (!sample)
183+
- return NULL;
184+
+ return AVERROR_EXTERNAL;
185+
186+
hr = IMFSample_GetBufferByIndex(sample, 0, &buffer);
187+
if (FAILED(hr)) {
188+
IMFSample_Release(sample);
189+
- return NULL;
190+
+ return AVERROR_EXTERNAL;
191+
}
192+
193+
hr = IMFMediaBuffer_Lock(buffer, &data, NULL, NULL);
194+
if (FAILED(hr)) {
195+
IMFMediaBuffer_Release(buffer);
196+
IMFSample_Release(sample);
197+
- return NULL;
198+
+ return AVERROR_EXTERNAL;
199+
}
200+
201+
ret = av_image_copy_to_buffer((uint8_t *)data, size, (void *)frame->data, frame->linesize,
202+
@@ -342,10 +434,43 @@ static IMFSample *mf_v_avframe_to_sample(AVCodecContext *avctx, const AVFrame *f
203+
IMFMediaBuffer_Release(buffer);
204+
if (ret < 0) {
205+
IMFSample_Release(sample);
206+
- return NULL;
207+
+ return AVERROR_EXTERNAL;
208+
}
209+
210+
IMFSample_SetSampleDuration(sample, mf_to_mf_time(avctx, frame->duration));
211+
+ *out_sample = sample;
212+
+
213+
+ return 0;
214+
+}
215+
+
216+
+static IMFSample *mf_v_avframe_to_sample(AVCodecContext *avctx, const AVFrame *frame)
217+
+{
218+
+ MFContext *c = avctx->priv_data;
219+
+ MFFunctions *func = &c->functions;
220+
+ IMFSample *sample = NULL;
221+
+ IMFMediaBuffer *buffer = NULL;
222+
+ HRESULT hr;
223+
+ int ret;
224+
+
225+
+ if (frame->format == AV_PIX_FMT_D3D11) {
226+
+ // Handle D3D11 hardware frames
227+
+ ret = process_d3d11_frame(avctx, frame, &sample);
228+
+ if (ret < 0) {
229+
+ return NULL;
230+
+ }
231+
+ } else {
232+
+ // Handle software frames
233+
+ ret = process_software_frame(avctx, frame, &sample);
234+
+ if (ret < 0) {
235+
+ return NULL;
236+
+ }
237+
+ }
238+
+
239+
+ // Set sample duration
240+
+ hr = IMFSample_SetSampleDuration(sample, mf_to_mf_time(avctx, frame->duration));
241+
+ if (FAILED(hr)) {
242+
+ av_log(avctx, AV_LOG_WARNING, "Failed to set sample duration: %s\n", ff_hr_str(hr));
243+
+ }
244+
245+
return sample;
246+
}
247+
@@ -511,6 +636,23 @@ static int mf_receive_packet(AVCodecContext *avctx, AVPacket *avpkt)
248+
}
249+
}
250+
251+
+ if(!c->stream_started)
252+
+ {
253+
+ HRESULT hr = IMFTransform_ProcessMessage(c->mft, MFT_MESSAGE_NOTIFY_BEGIN_STREAMING, 0);
254+
+ if (FAILED(hr)) {
255+
+ av_log(avctx, AV_LOG_ERROR, "could not start streaming (%s)\n", ff_hr_str(hr));
256+
+ return AVERROR(EBADMSG);
257+
+ }
258+
+
259+
+ hr = IMFTransform_ProcessMessage(c->mft, MFT_MESSAGE_NOTIFY_START_OF_STREAM, 0);
260+
+ if (FAILED(hr)) {
261+
+ av_log(avctx, AV_LOG_ERROR, "could not start stream (%s)\n", ff_hr_str(hr));
262+
+ return AVERROR(EBADMSG);
263+
+ }
264+
+
265+
+ c->stream_started = 1;
266+
+ }
267+
+
268+
ret = mf_send_sample(avctx, sample);
269+
if (sample)
270+
IMFSample_Release(sample);
271+
@@ -727,8 +869,16 @@ static int mf_encv_output_adjust(AVCodecContext *avctx, IMFMediaType *type)
272+
static int64_t mf_encv_input_score(AVCodecContext *avctx, IMFMediaType *type)
273+
{
274+
enum AVPixelFormat pix_fmt = ff_media_type_to_pix_fmt((IMFAttributes *)type);
275+
- if (pix_fmt != avctx->pix_fmt)
276+
- return -1; // can not use
277+
+
278+
+ if (avctx->pix_fmt == AV_PIX_FMT_D3D11) {
279+
+ if (pix_fmt != AV_PIX_FMT_NV12) {
280+
+ return -1; // can not use
281+
+ }
282+
+ }
283+
+ else {
284+
+ if (pix_fmt != avctx->pix_fmt)
285+
+ return -1; // can not use
286+
+ }
287+
288+
return 0;
289+
}
290+
@@ -736,9 +886,16 @@ static int64_t mf_encv_input_score(AVCodecContext *avctx, IMFMediaType *type)
291+
static int mf_encv_input_adjust(AVCodecContext *avctx, IMFMediaType *type)
292+
{
293+
enum AVPixelFormat pix_fmt = ff_media_type_to_pix_fmt((IMFAttributes *)type);
294+
- if (pix_fmt != avctx->pix_fmt) {
295+
- av_log(avctx, AV_LOG_ERROR, "unsupported input pixel format set\n");
296+
- return AVERROR(EINVAL);
297+
+ if (avctx->pix_fmt == AV_PIX_FMT_D3D11) {
298+
+ if (pix_fmt != AV_PIX_FMT_NV12 && pix_fmt != AV_PIX_FMT_D3D11) {
299+
+ av_log(avctx, AV_LOG_ERROR, "unsupported input pixel format set\n");
300+
+ return AVERROR(EINVAL);
301+
+ }
302+
+ } else {
303+
+ if (pix_fmt != avctx->pix_fmt) {
304+
+ av_log(avctx, AV_LOG_ERROR, "unsupported input pixel format set\n");
305+
+ return AVERROR(EINVAL);
306+
+ }
307+
}
308+
309+
//ff_MFSetAttributeSize((IMFAttributes *)type, &MF_MT_FRAME_SIZE, avctx->width, avctx->height);
310+
@@ -1106,18 +1263,6 @@ static int mf_init_encoder(AVCodecContext *avctx)
311+
if ((ret = mf_setup_context(avctx)) < 0)
312+
return ret;
313+
314+
- hr = IMFTransform_ProcessMessage(c->mft, MFT_MESSAGE_NOTIFY_BEGIN_STREAMING, 0);
315+
- if (FAILED(hr)) {
316+
- av_log(avctx, AV_LOG_ERROR, "could not start streaming (%s)\n", ff_hr_str(hr));
317+
- return AVERROR_EXTERNAL;
318+
- }
319+
-
320+
- hr = IMFTransform_ProcessMessage(c->mft, MFT_MESSAGE_NOTIFY_START_OF_STREAM, 0);
321+
- if (FAILED(hr)) {
322+
- av_log(avctx, AV_LOG_ERROR, "could not start stream (%s)\n", ff_hr_str(hr));
323+
- return AVERROR_EXTERNAL;
324+
- }
325+
-
326+
if (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER && c->async_events &&
327+
c->is_video && !avctx->extradata) {
328+
int sleep = 10000, total = 0;
329+
@@ -1175,6 +1320,7 @@ static int mf_load_library(AVCodecContext *avctx)
330+
331+
#if !HAVE_UWP
332+
c->library = dlopen("mfplat.dll", 0);
333+
+ c->d3d_dll = dlopen("D3D11.dll", 0);
334+
335+
if (!c->library) {
336+
av_log(c, AV_LOG_ERROR, "DLL mfplat.dll failed to open\n");
337+
@@ -1187,6 +1333,8 @@ static int mf_load_library(AVCodecContext *avctx)
338+
LOAD_MF_FUNCTION(c, MFCreateAlignedMemoryBuffer);
339+
LOAD_MF_FUNCTION(c, MFCreateSample);
340+
LOAD_MF_FUNCTION(c, MFCreateMediaType);
341+
+ LOAD_MF_FUNCTION(c, MFCreateDXGISurfaceBuffer);
342+
+ LOAD_MF_FUNCTION(c, MFCreateDXGIDeviceManager);
343+
// MFTEnumEx is missing in Windows Vista's mfplat.dll.
344+
LOAD_MF_FUNCTION(c, MFTEnumEx);
345+
346+
@@ -1208,6 +1356,7 @@ static int mf_close(AVCodecContext *avctx)
347+
ff_free_mf(&c->functions, &c->mft);
348+
349+
dlclose(c->library);
350+
+ dlclose(c->d3d_dll);
351+
c->library = NULL;
352+
#else
353+
ff_free_mf(&c->functions, &c->mft);
354+
@@ -1300,7 +1449,7 @@ static const FFCodecDefault defaults[] = {
355+
};
356+
357+
#define VFMTS \
358+
- CODEC_PIXFMTS(AV_PIX_FMT_NV12, AV_PIX_FMT_YUV420P),
359+
+ CODEC_PIXFMTS(AV_PIX_FMT_NV12, AV_PIX_FMT_YUV420P, AV_PIX_FMT_D3D11),
360+
#define VCAPS \
361+
.p.capabilities = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_HYBRID | \
362+
AV_CODEC_CAP_DR1,
363+
--
364+
2.43.0.windows.1
365+

0 commit comments

Comments
 (0)