|
| 1 | +diff --git a/source/common/ringmem.cpp b/source/common/ringmem.cpp |
| 2 | +index cfd44e83d..14ec3d316 100644 |
| 3 | +--- a/source/common/ringmem.cpp |
| 4 | ++++ b/source/common/ringmem.cpp |
| 5 | +@@ -135,6 +135,19 @@ namespace X265_NS { |
| 6 | + int32_t shrMemSize = (itemSize * itemCnt + sizeof(ShrMemCtrl) + RINGMEM_ALLIGNMENT - 1) & ~(RINGMEM_ALLIGNMENT - 1); |
| 7 | + |
| 8 | + #ifdef _WIN32 |
| 9 | ++# if defined(WINAPI_FAMILY) && (WINAPI_FAMILY == WINAPI_FAMILY_PC_APP || WINAPI_FAMILY == WINAPI_FAMILY_PHONE_APP) |
| 10 | ++ WCHAR wnameBuf[MAX_PATH]; |
| 11 | ++ if (MultiByteToWideChar(CP_UTF8, 0, nameBuf, -1, wnameBuf, MAX_PATH) == 0) { |
| 12 | ++ return false; |
| 13 | ++ } |
| 14 | ++ HANDLE h = OpenFileMappingFromApp(FILE_MAP_WRITE | FILE_MAP_READ, FALSE, wnameBuf); |
| 15 | ++ if (!h) |
| 16 | ++ { |
| 17 | ++ return false; |
| 18 | ++ } |
| 19 | ++ |
| 20 | ++ void* pool = MapViewOfFile3FromApp(h, GetCurrentProcess(), NULL, 0, 0, 0, PAGE_READWRITE, NULL, 0); |
| 21 | ++# else |
| 22 | + HANDLE h = OpenFileMappingA(FILE_MAP_WRITE | FILE_MAP_READ, FALSE, nameBuf); |
| 23 | + if (!h) |
| 24 | + { |
| 25 | +@@ -149,6 +162,7 @@ namespace X265_NS { |
| 26 | + } |
| 27 | + |
| 28 | + void *pool = MapViewOfFile(h, FILE_MAP_ALL_ACCESS, 0, 0, 0); |
| 29 | ++# endif |
| 30 | + |
| 31 | + ///< should not close the handle here, otherwise the OpenFileMapping would fail |
| 32 | + //CloseHandle(h); |
| 33 | +diff --git a/source/common/threadpool.cpp b/source/common/threadpool.cpp |
| 34 | +index 9c27be783..5dde29e34 100644 |
| 35 | +--- a/source/common/threadpool.cpp |
| 36 | ++++ b/source/common/threadpool.cpp |
| 37 | +@@ -71,7 +71,7 @@ |
| 38 | + # define strcasecmp _stricmp |
| 39 | + #endif |
| 40 | + |
| 41 | +-#if defined(_WIN32_WINNT) && _WIN32_WINNT >= _WIN32_WINNT_WIN7 |
| 42 | ++#ifdef USE_WIN32_AFFINITY |
| 43 | + const uint64_t m1 = 0x5555555555555555; //binary: 0101... |
| 44 | + const uint64_t m2 = 0x3333333333333333; //binary: 00110011.. |
| 45 | + const uint64_t m3 = 0x0f0f0f0f0f0f0f0f; //binary: 4 zeros, 4 ones ... |
| 46 | +@@ -262,14 +262,14 @@ ThreadPool* ThreadPool::allocThreadPools(x265_param* p, int& numPools, bool isTh |
| 47 | + int numNumaNodes = X265_MIN(getNumaNodeCount(), MAX_NODE_NUM); |
| 48 | + bool bNumaSupport = false; |
| 49 | + |
| 50 | +-#if defined(_WIN32_WINNT) && _WIN32_WINNT >= _WIN32_WINNT_WIN7 |
| 51 | ++#ifdef USE_WIN32_AFFINITY |
| 52 | + bNumaSupport = true; |
| 53 | + #elif HAVE_LIBNUMA |
| 54 | + bNumaSupport = numa_available() >= 0; |
| 55 | + #endif |
| 56 | + |
| 57 | + |
| 58 | +-#if defined(_WIN32_WINNT) && _WIN32_WINNT >= _WIN32_WINNT_WIN7 |
| 59 | ++#ifdef USE_WIN32_AFFINITY |
| 60 | + PGROUP_AFFINITY groupAffinityPointer = new GROUP_AFFINITY; |
| 61 | + for (int i = 0; i < numNumaNodes; i++) |
| 62 | + { |
| 63 | +@@ -475,7 +475,7 @@ bool ThreadPool::create(int numThreads, int maxProviders, uint64_t nodeMask) |
| 64 | + { |
| 65 | + X265_CHECK(numThreads <= MAX_POOL_THREADS, "a single thread pool cannot have more than MAX_POOL_THREADS threads\n"); |
| 66 | + |
| 67 | +-#if defined(_WIN32_WINNT) && _WIN32_WINNT >= _WIN32_WINNT_WIN7 |
| 68 | ++#ifdef USE_WIN32_AFFINITY |
| 69 | + memset(&m_groupAffinity, 0, sizeof(GROUP_AFFINITY)); |
| 70 | + for (int i = 0; i < getNumaNodeCount(); i++) |
| 71 | + { |
| 72 | +@@ -568,7 +568,7 @@ void ThreadPool::setCurrentThreadAffinity() |
| 73 | + |
| 74 | + void ThreadPool::setThreadNodeAffinity(void *numaMask) |
| 75 | + { |
| 76 | +-#if defined(_WIN32_WINNT) && _WIN32_WINNT >= _WIN32_WINNT_WIN7 |
| 77 | ++#ifdef USE_WIN32_AFFINITY |
| 78 | + UNREFERENCED_PARAMETER(numaMask); |
| 79 | + GROUP_AFFINITY groupAffinity; |
| 80 | + memset(&groupAffinity, 0, sizeof(GROUP_AFFINITY)); |
| 81 | +@@ -597,7 +597,7 @@ void ThreadPool::setThreadNodeAffinity(void *numaMask) |
| 82 | + /* static */ |
| 83 | + int ThreadPool::getNumaNodeCount() |
| 84 | + { |
| 85 | +-#if defined(_WIN32_WINNT) && _WIN32_WINNT >= _WIN32_WINNT_WIN7 |
| 86 | ++#ifdef USE_WIN32_AFFINITY |
| 87 | + ULONG num = 1; |
| 88 | + if (GetNumaHighestNodeNumber(&num)) |
| 89 | + num++; |
| 90 | +@@ -615,7 +615,7 @@ int ThreadPool::getNumaNodeCount() |
| 91 | + /* static */ |
| 92 | + int ThreadPool::getCpuCount() |
| 93 | + { |
| 94 | +-#if defined(_WIN32_WINNT) && _WIN32_WINNT >= _WIN32_WINNT_WIN7 |
| 95 | ++#ifdef USE_WIN32_AFFINITY |
| 96 | + enum { MAX_NODE_NUM = 127 }; |
| 97 | + int cpus = 0; |
| 98 | + int numNumaNodes = X265_MIN(getNumaNodeCount(), MAX_NODE_NUM); |
| 99 | +diff --git a/source/common/threadpool.h b/source/common/threadpool.h |
| 100 | +index 867539f3a..8c92090f2 100644 |
| 101 | +--- a/source/common/threadpool.h |
| 102 | ++++ b/source/common/threadpool.h |
| 103 | +@@ -44,6 +44,14 @@ static const sleepbitmap_t ALL_POOL_THREADS = (sleepbitmap_t)-1; |
| 104 | + enum { MAX_POOL_THREADS = sizeof(sleepbitmap_t) * 8 }; |
| 105 | + enum { INVALID_SLICE_PRIORITY = 10 }; // a value larger than any X265_TYPE_* macro |
| 106 | + |
| 107 | ++#if defined(_WIN32_WINNT) && _WIN32_WINNT >= _WIN32_WINNT_WIN7 |
| 108 | ++# include <winapifamily.h> |
| 109 | ++# if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) |
| 110 | ++// GetNumaNodeProcessorMaskEx is not available in UWP apps |
| 111 | ++# define USE_WIN32_AFFINITY 1 |
| 112 | ++# endif |
| 113 | ++#endif |
| 114 | ++ |
| 115 | + // Frame level job providers. FrameEncoder and Lookahead derive from |
| 116 | + // this class and implement findJob() |
| 117 | + class JobProvider |
| 118 | +@@ -84,7 +92,7 @@ public: |
| 119 | + int m_numProviders; |
| 120 | + int m_numWorkers; |
| 121 | + void* m_numaMask; // node mask in linux, cpu mask in windows |
| 122 | +-#if defined(_WIN32_WINNT) && _WIN32_WINNT >= _WIN32_WINNT_WIN7 |
| 123 | ++#ifdef USE_WIN32_AFFINITY |
| 124 | + GROUP_AFFINITY m_groupAffinity; |
| 125 | + #endif |
| 126 | + bool m_isActive; |
| 127 | +diff --git a/source/common/x86/h-ipfilter8.asm b/source/common/x86/h-ipfilter8.asm |
| 128 | +index 757efc509..53d7f2d3c 100644 |
| 129 | +--- a/source/common/x86/h-ipfilter8.asm |
| 130 | ++++ b/source/common/x86/h-ipfilter8.asm |
| 131 | +@@ -125,9 +125,6 @@ const pb_8tap_hps_0, times 2 db 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 |
| 132 | + ALIGN 32 |
| 133 | + interp4_hps_shuf: times 2 db 0, 1, 2, 3, 1, 2, 3, 4, 8, 9, 10, 11, 9, 10, 11, 12 |
| 134 | + |
| 135 | +-ALIGN 32 |
| 136 | +-const interp_4tap_8x8_horiz_shuf, dd 0, 4, 1, 5, 2, 6, 3, 7 |
| 137 | +- |
| 138 | + SECTION .text |
| 139 | + |
| 140 | + cextern pw_1 |
| 141 | +@@ -1462,6 +1459,8 @@ cglobal interp_4tap_horiz_pp_4x32, 4, 6, 5, src, srcstride, dst, dststride |
| 142 | + |
| 143 | + RET |
| 144 | + |
| 145 | ++ALIGN 32 |
| 146 | ++const interp_4tap_8x8_horiz_shuf, dd 0, 4, 1, 5, 2, 6, 3, 7 |
| 147 | + |
| 148 | + %macro FILTER_H4_w6 3 |
| 149 | + movu %1, [srcq - 1] |
0 commit comments