|
| 1 | +From d56522c6eb754cd5c648bc0c0b39bd4fbd77c47c Mon Sep 17 00:00:00 2001 |
| 2 | +From: Dash Santosh <dash.sathyanarayanan@multicorewareinc.com> |
| 3 | +Date: Thu, 22 May 2025 05:34:37 -0700 |
| 4 | +Subject: [PATCH 02/13] avcodec/mfenc: add support for D3D11 input surfaces |
| 5 | + |
| 6 | +Adds D3D11 input surface support to the MediaFoundation encoder (mfenc), |
| 7 | +allowing direct encoding of GPU frames without readback to system memory. |
| 8 | +This improves performance and compatibility when used alongside scale_d3d11. |
| 9 | + |
| 10 | +Signed-off-by: Dash Santosh <dash.sathyanarayanan@multicorewareinc.com> |
| 11 | +--- |
| 12 | + libavcodec/mf_utils.h | 7 ++ |
| 13 | + libavcodec/mfenc.c | 207 ++++++++++++++++++++++++++++++++++++------ |
| 14 | + 2 files changed, 185 insertions(+), 29 deletions(-) |
| 15 | + |
| 16 | +diff --git a/libavcodec/mf_utils.h b/libavcodec/mf_utils.h |
| 17 | +index a59b36d015..ecebb6fcdf 100644 |
| 18 | +--- a/libavcodec/mf_utils.h |
| 19 | ++++ b/libavcodec/mf_utils.h |
| 20 | +@@ -53,6 +53,13 @@ typedef struct MFFunctions { |
| 21 | + IMFMediaBuffer **ppBuffer); |
| 22 | + HRESULT (WINAPI *MFCreateSample) (IMFSample **ppIMFSample); |
| 23 | + HRESULT (WINAPI *MFCreateMediaType) (IMFMediaType **ppMFType); |
| 24 | ++ HRESULT (WINAPI *MFCreateDXGISurfaceBuffer) (REFIID riid, |
| 25 | ++ IUnknown* punkSurface, |
| 26 | ++ UINT uSubresourceIndex, |
| 27 | ++ BOOL fBottomUpWhenLinear, |
| 28 | ++ IMFMediaBuffer** ppBuffer); |
| 29 | ++ HRESULT (WINAPI *MFCreateDXGIDeviceManager) (UINT* resetToken, |
| 30 | ++ IMFDXGIDeviceManager** ppDeviceManager); |
| 31 | + // MFTEnumEx is missing in Windows Vista's mfplat.dll. |
| 32 | + HRESULT (WINAPI *MFTEnumEx)(GUID guidCategory, UINT32 Flags, |
| 33 | + const MFT_REGISTER_TYPE_INFO *pInputType, |
| 34 | +diff --git a/libavcodec/mfenc.c b/libavcodec/mfenc.c |
| 35 | +index c9e2191fde..912984c306 100644 |
| 36 | +--- a/libavcodec/mfenc.c |
| 37 | ++++ b/libavcodec/mfenc.c |
| 38 | +@@ -31,10 +31,18 @@ |
| 39 | + #include "codec_internal.h" |
| 40 | + #include "internal.h" |
| 41 | + #include "compat/w32dlfcn.h" |
| 42 | ++#if CONFIG_D3D11VA |
| 43 | ++#include "libavutil/hwcontext_d3d11va.h" |
| 44 | ++#endif |
| 45 | + |
| 46 | + typedef struct MFContext { |
| 47 | + AVClass *av_class; |
| 48 | + HMODULE library; |
| 49 | ++ HMODULE d3d_dll; |
| 50 | ++ ID3D11DeviceContext* d3d_context; |
| 51 | ++ IMFDXGIDeviceManager *dxgiManager; |
| 52 | ++ int resetToken; |
| 53 | ++ |
| 54 | + MFFunctions functions; |
| 55 | + AVFrame *frame; |
| 56 | + int is_video, is_audio; |
| 57 | +@@ -47,6 +55,7 @@ typedef struct MFContext { |
| 58 | + int out_stream_provides_samples; |
| 59 | + int draining, draining_done; |
| 60 | + int sample_sent; |
| 61 | ++ int stream_started; |
| 62 | + int async_need_input, async_have_output, async_marker; |
| 63 | + int64_t reorder_delay; |
| 64 | + ICodecAPI *codec_api; |
| 65 | +@@ -55,6 +64,7 @@ typedef struct MFContext { |
| 66 | + int opt_enc_quality; |
| 67 | + int opt_enc_scenario; |
| 68 | + int opt_enc_hw; |
| 69 | ++ AVD3D11VADeviceContext* device_hwctx; |
| 70 | + } MFContext; |
| 71 | + |
| 72 | + static int mf_choose_output_type(AVCodecContext *avctx); |
| 73 | +@@ -303,36 +313,118 @@ static IMFSample *mf_a_avframe_to_sample(AVCodecContext *avctx, const AVFrame *f |
| 74 | + return sample; |
| 75 | + } |
| 76 | + |
| 77 | +-static IMFSample *mf_v_avframe_to_sample(AVCodecContext *avctx, const AVFrame *frame) |
| 78 | ++static int initialize_dxgi_manager(AVCodecContext *avctx) |
| 79 | + { |
| 80 | + MFContext *c = avctx->priv_data; |
| 81 | +- IMFSample *sample; |
| 82 | +- IMFMediaBuffer *buffer; |
| 83 | +- BYTE *data; |
| 84 | ++ MFFunctions *func = &c->functions; |
| 85 | + HRESULT hr; |
| 86 | +- int ret; |
| 87 | +- int size; |
| 88 | ++ |
| 89 | ++ hr = func->MFCreateDXGIDeviceManager(&c->resetToken, &c->dxgiManager); |
| 90 | ++ if (FAILED(hr)) { |
| 91 | ++ av_log(avctx, AV_LOG_ERROR, "Failed to create DXGI device manager: %s\n", ff_hr_str(hr)); |
| 92 | ++ return AVERROR_EXTERNAL; |
| 93 | ++ } |
| 94 | ++ |
| 95 | ++ hr = IMFDXGIDeviceManager_ResetDevice(c->dxgiManager, c->device_hwctx->device, c->resetToken); |
| 96 | ++ if (FAILED(hr)) { |
| 97 | ++ av_log(avctx, AV_LOG_ERROR, "Failed to reset device: %s\n", ff_hr_str(hr)); |
| 98 | ++ return AVERROR_EXTERNAL; |
| 99 | ++ } |
| 100 | ++ |
| 101 | ++ hr = IMFTransform_ProcessMessage(c->mft, MFT_MESSAGE_SET_D3D_MANAGER, (ULONG_PTR)c->dxgiManager); |
| 102 | ++ if (FAILED(hr)) { |
| 103 | ++ av_log(avctx, AV_LOG_ERROR, "Failed to set D3D manager: %s\n", ff_hr_str(hr)); |
| 104 | ++ return AVERROR_EXTERNAL; |
| 105 | ++ } |
| 106 | ++ |
| 107 | ++ return 0; |
| 108 | ++} |
| 109 | ++ |
| 110 | ++static int process_d3d11_frame(AVCodecContext *avctx, const AVFrame *frame, IMFSample **out_sample) |
| 111 | ++{ |
| 112 | ++ MFContext *c = avctx->priv_data; |
| 113 | ++ MFFunctions *func = &c->functions; |
| 114 | ++ AVHWFramesContext *frames_ctx = NULL; |
| 115 | ++ ID3D11Texture2D *d3d11_texture = NULL; |
| 116 | ++ IMFSample *sample = NULL; |
| 117 | ++ IMFMediaBuffer *buffer = NULL; |
| 118 | ++ int subIdx = 0; |
| 119 | ++ HRESULT hr; |
| 120 | ++ |
| 121 | ++ frames_ctx = (AVHWFramesContext*)frame->hw_frames_ctx->data; |
| 122 | ++ c->device_hwctx = (AVD3D11VADeviceContext*)frames_ctx->device_ctx->hwctx; |
| 123 | ++ |
| 124 | ++ if (!c->dxgiManager) { |
| 125 | ++ hr = initialize_dxgi_manager(avctx); |
| 126 | ++ if (FAILED(hr)) { |
| 127 | ++ return AVERROR_EXTERNAL; |
| 128 | ++ } |
| 129 | ++ } |
| 130 | ++ |
| 131 | ++ d3d11_texture = (ID3D11Texture2D*)frame->data[0]; |
| 132 | ++ subIdx = (int)(intptr_t)frame->data[1]; |
| 133 | ++ |
| 134 | ++ if (!d3d11_texture) { |
| 135 | ++ av_log(avctx, AV_LOG_ERROR, "D3D11 texture not found\n"); |
| 136 | ++ return AVERROR(EINVAL); |
| 137 | ++ } |
| 138 | ++ |
| 139 | ++ hr = func->MFCreateSample(&sample); |
| 140 | ++ if (FAILED(hr)) { |
| 141 | ++ av_log(avctx, AV_LOG_ERROR, "Failed to create MF sample: %s\n", ff_hr_str(hr)); |
| 142 | ++ return AVERROR_EXTERNAL; |
| 143 | ++ } |
| 144 | ++ |
| 145 | ++ hr = func->MFCreateDXGISurfaceBuffer(&IID_ID3D11Texture2D, d3d11_texture, subIdx, 0, &buffer); |
| 146 | ++ if (FAILED(hr)) { |
| 147 | ++ av_log(avctx, AV_LOG_ERROR, "Failed to create DXGI surface buffer: %s\n", ff_hr_str(hr)); |
| 148 | ++ IMFSample_Release(sample); |
| 149 | ++ return AVERROR_EXTERNAL; |
| 150 | ++ } |
| 151 | ++ |
| 152 | ++ hr = IMFSample_AddBuffer(sample, buffer); |
| 153 | ++ if (FAILED(hr)) { |
| 154 | ++ av_log(avctx, AV_LOG_ERROR, "Failed to add buffer to sample: %s\n", ff_hr_str(hr)); |
| 155 | ++ IMFMediaBuffer_Release(buffer); |
| 156 | ++ IMFSample_Release(sample); |
| 157 | ++ return AVERROR_EXTERNAL; |
| 158 | ++ } |
| 159 | ++ |
| 160 | ++ IMFMediaBuffer_Release(buffer); |
| 161 | ++ |
| 162 | ++ *out_sample = sample; |
| 163 | ++ return 0; |
| 164 | ++} |
| 165 | ++ |
| 166 | ++static int process_software_frame(AVCodecContext *avctx, const AVFrame *frame, IMFSample **out_sample) |
| 167 | ++{ |
| 168 | ++ MFContext *c = avctx->priv_data; |
| 169 | ++ IMFSample *sample = NULL; |
| 170 | ++ IMFMediaBuffer *buffer = NULL; |
| 171 | ++ BYTE *data = NULL; |
| 172 | ++ HRESULT hr; |
| 173 | ++ int size, ret; |
| 174 | + |
| 175 | + size = av_image_get_buffer_size(avctx->pix_fmt, avctx->width, avctx->height, 1); |
| 176 | + if (size < 0) |
| 177 | +- return NULL; |
| 178 | ++ return AVERROR_EXTERNAL; |
| 179 | + |
| 180 | + sample = ff_create_memory_sample(&c->functions, NULL, size, |
| 181 | + c->in_info.cbAlignment); |
| 182 | + if (!sample) |
| 183 | +- return NULL; |
| 184 | ++ return AVERROR_EXTERNAL; |
| 185 | + |
| 186 | + hr = IMFSample_GetBufferByIndex(sample, 0, &buffer); |
| 187 | + if (FAILED(hr)) { |
| 188 | + IMFSample_Release(sample); |
| 189 | +- return NULL; |
| 190 | ++ return AVERROR_EXTERNAL; |
| 191 | + } |
| 192 | + |
| 193 | + hr = IMFMediaBuffer_Lock(buffer, &data, NULL, NULL); |
| 194 | + if (FAILED(hr)) { |
| 195 | + IMFMediaBuffer_Release(buffer); |
| 196 | + IMFSample_Release(sample); |
| 197 | +- return NULL; |
| 198 | ++ return AVERROR_EXTERNAL; |
| 199 | + } |
| 200 | + |
| 201 | + ret = av_image_copy_to_buffer((uint8_t *)data, size, (void *)frame->data, frame->linesize, |
| 202 | +@@ -342,10 +434,43 @@ static IMFSample *mf_v_avframe_to_sample(AVCodecContext *avctx, const AVFrame *f |
| 203 | + IMFMediaBuffer_Release(buffer); |
| 204 | + if (ret < 0) { |
| 205 | + IMFSample_Release(sample); |
| 206 | +- return NULL; |
| 207 | ++ return AVERROR_EXTERNAL; |
| 208 | + } |
| 209 | + |
| 210 | + IMFSample_SetSampleDuration(sample, mf_to_mf_time(avctx, frame->duration)); |
| 211 | ++ *out_sample = sample; |
| 212 | ++ |
| 213 | ++ return 0; |
| 214 | ++} |
| 215 | ++ |
| 216 | ++static IMFSample *mf_v_avframe_to_sample(AVCodecContext *avctx, const AVFrame *frame) |
| 217 | ++{ |
| 218 | ++ MFContext *c = avctx->priv_data; |
| 219 | ++ MFFunctions *func = &c->functions; |
| 220 | ++ IMFSample *sample = NULL; |
| 221 | ++ IMFMediaBuffer *buffer = NULL; |
| 222 | ++ HRESULT hr; |
| 223 | ++ int ret; |
| 224 | ++ |
| 225 | ++ if (frame->format == AV_PIX_FMT_D3D11) { |
| 226 | ++ // Handle D3D11 hardware frames |
| 227 | ++ ret = process_d3d11_frame(avctx, frame, &sample); |
| 228 | ++ if (ret < 0) { |
| 229 | ++ return NULL; |
| 230 | ++ } |
| 231 | ++ } else { |
| 232 | ++ // Handle software frames |
| 233 | ++ ret = process_software_frame(avctx, frame, &sample); |
| 234 | ++ if (ret < 0) { |
| 235 | ++ return NULL; |
| 236 | ++ } |
| 237 | ++ } |
| 238 | ++ |
| 239 | ++ // Set sample duration |
| 240 | ++ hr = IMFSample_SetSampleDuration(sample, mf_to_mf_time(avctx, frame->duration)); |
| 241 | ++ if (FAILED(hr)) { |
| 242 | ++ av_log(avctx, AV_LOG_WARNING, "Failed to set sample duration: %s\n", ff_hr_str(hr)); |
| 243 | ++ } |
| 244 | + |
| 245 | + return sample; |
| 246 | + } |
| 247 | +@@ -511,6 +636,23 @@ static int mf_receive_packet(AVCodecContext *avctx, AVPacket *avpkt) |
| 248 | + } |
| 249 | + } |
| 250 | + |
| 251 | ++ if(!c->stream_started) |
| 252 | ++ { |
| 253 | ++ HRESULT hr = IMFTransform_ProcessMessage(c->mft, MFT_MESSAGE_NOTIFY_BEGIN_STREAMING, 0); |
| 254 | ++ if (FAILED(hr)) { |
| 255 | ++ av_log(avctx, AV_LOG_ERROR, "could not start streaming (%s)\n", ff_hr_str(hr)); |
| 256 | ++ return AVERROR(EBADMSG); |
| 257 | ++ } |
| 258 | ++ |
| 259 | ++ hr = IMFTransform_ProcessMessage(c->mft, MFT_MESSAGE_NOTIFY_START_OF_STREAM, 0); |
| 260 | ++ if (FAILED(hr)) { |
| 261 | ++ av_log(avctx, AV_LOG_ERROR, "could not start stream (%s)\n", ff_hr_str(hr)); |
| 262 | ++ return AVERROR(EBADMSG); |
| 263 | ++ } |
| 264 | ++ |
| 265 | ++ c->stream_started = 1; |
| 266 | ++ } |
| 267 | ++ |
| 268 | + ret = mf_send_sample(avctx, sample); |
| 269 | + if (sample) |
| 270 | + IMFSample_Release(sample); |
| 271 | +@@ -727,8 +869,16 @@ static int mf_encv_output_adjust(AVCodecContext *avctx, IMFMediaType *type) |
| 272 | + static int64_t mf_encv_input_score(AVCodecContext *avctx, IMFMediaType *type) |
| 273 | + { |
| 274 | + enum AVPixelFormat pix_fmt = ff_media_type_to_pix_fmt((IMFAttributes *)type); |
| 275 | +- if (pix_fmt != avctx->pix_fmt) |
| 276 | +- return -1; // can not use |
| 277 | ++ |
| 278 | ++ if (avctx->pix_fmt == AV_PIX_FMT_D3D11) { |
| 279 | ++ if (pix_fmt != AV_PIX_FMT_NV12) { |
| 280 | ++ return -1; // can not use |
| 281 | ++ } |
| 282 | ++ } |
| 283 | ++ else { |
| 284 | ++ if (pix_fmt != avctx->pix_fmt) |
| 285 | ++ return -1; // can not use |
| 286 | ++ } |
| 287 | + |
| 288 | + return 0; |
| 289 | + } |
| 290 | +@@ -736,9 +886,16 @@ static int64_t mf_encv_input_score(AVCodecContext *avctx, IMFMediaType *type) |
| 291 | + static int mf_encv_input_adjust(AVCodecContext *avctx, IMFMediaType *type) |
| 292 | + { |
| 293 | + enum AVPixelFormat pix_fmt = ff_media_type_to_pix_fmt((IMFAttributes *)type); |
| 294 | +- if (pix_fmt != avctx->pix_fmt) { |
| 295 | +- av_log(avctx, AV_LOG_ERROR, "unsupported input pixel format set\n"); |
| 296 | +- return AVERROR(EINVAL); |
| 297 | ++ if (avctx->pix_fmt == AV_PIX_FMT_D3D11) { |
| 298 | ++ if (pix_fmt != AV_PIX_FMT_NV12 && pix_fmt != AV_PIX_FMT_D3D11) { |
| 299 | ++ av_log(avctx, AV_LOG_ERROR, "unsupported input pixel format set\n"); |
| 300 | ++ return AVERROR(EINVAL); |
| 301 | ++ } |
| 302 | ++ } else { |
| 303 | ++ if (pix_fmt != avctx->pix_fmt) { |
| 304 | ++ av_log(avctx, AV_LOG_ERROR, "unsupported input pixel format set\n"); |
| 305 | ++ return AVERROR(EINVAL); |
| 306 | ++ } |
| 307 | + } |
| 308 | + |
| 309 | + //ff_MFSetAttributeSize((IMFAttributes *)type, &MF_MT_FRAME_SIZE, avctx->width, avctx->height); |
| 310 | +@@ -1106,18 +1263,6 @@ static int mf_init_encoder(AVCodecContext *avctx) |
| 311 | + if ((ret = mf_setup_context(avctx)) < 0) |
| 312 | + return ret; |
| 313 | + |
| 314 | +- hr = IMFTransform_ProcessMessage(c->mft, MFT_MESSAGE_NOTIFY_BEGIN_STREAMING, 0); |
| 315 | +- if (FAILED(hr)) { |
| 316 | +- av_log(avctx, AV_LOG_ERROR, "could not start streaming (%s)\n", ff_hr_str(hr)); |
| 317 | +- return AVERROR_EXTERNAL; |
| 318 | +- } |
| 319 | +- |
| 320 | +- hr = IMFTransform_ProcessMessage(c->mft, MFT_MESSAGE_NOTIFY_START_OF_STREAM, 0); |
| 321 | +- if (FAILED(hr)) { |
| 322 | +- av_log(avctx, AV_LOG_ERROR, "could not start stream (%s)\n", ff_hr_str(hr)); |
| 323 | +- return AVERROR_EXTERNAL; |
| 324 | +- } |
| 325 | +- |
| 326 | + if (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER && c->async_events && |
| 327 | + c->is_video && !avctx->extradata) { |
| 328 | + int sleep = 10000, total = 0; |
| 329 | +@@ -1175,6 +1320,7 @@ static int mf_load_library(AVCodecContext *avctx) |
| 330 | + |
| 331 | + #if !HAVE_UWP |
| 332 | + c->library = dlopen("mfplat.dll", 0); |
| 333 | ++ c->d3d_dll = dlopen("D3D11.dll", 0); |
| 334 | + |
| 335 | + if (!c->library) { |
| 336 | + av_log(c, AV_LOG_ERROR, "DLL mfplat.dll failed to open\n"); |
| 337 | +@@ -1187,6 +1333,8 @@ static int mf_load_library(AVCodecContext *avctx) |
| 338 | + LOAD_MF_FUNCTION(c, MFCreateAlignedMemoryBuffer); |
| 339 | + LOAD_MF_FUNCTION(c, MFCreateSample); |
| 340 | + LOAD_MF_FUNCTION(c, MFCreateMediaType); |
| 341 | ++ LOAD_MF_FUNCTION(c, MFCreateDXGISurfaceBuffer); |
| 342 | ++ LOAD_MF_FUNCTION(c, MFCreateDXGIDeviceManager); |
| 343 | + // MFTEnumEx is missing in Windows Vista's mfplat.dll. |
| 344 | + LOAD_MF_FUNCTION(c, MFTEnumEx); |
| 345 | + |
| 346 | +@@ -1208,6 +1356,7 @@ static int mf_close(AVCodecContext *avctx) |
| 347 | + ff_free_mf(&c->functions, &c->mft); |
| 348 | + |
| 349 | + dlclose(c->library); |
| 350 | ++ dlclose(c->d3d_dll); |
| 351 | + c->library = NULL; |
| 352 | + #else |
| 353 | + ff_free_mf(&c->functions, &c->mft); |
| 354 | +@@ -1300,7 +1449,7 @@ static const FFCodecDefault defaults[] = { |
| 355 | + }; |
| 356 | + |
| 357 | + #define VFMTS \ |
| 358 | +- CODEC_PIXFMTS(AV_PIX_FMT_NV12, AV_PIX_FMT_YUV420P), |
| 359 | ++ CODEC_PIXFMTS(AV_PIX_FMT_NV12, AV_PIX_FMT_YUV420P, AV_PIX_FMT_D3D11), |
| 360 | + #define VCAPS \ |
| 361 | + .p.capabilities = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_HYBRID | \ |
| 362 | + AV_CODEC_CAP_DR1, |
| 363 | +-- |
| 364 | +2.43.0.windows.1 |
| 365 | + |
0 commit comments