| // Bench.cpp |
| |
| #include "StdAfx.h" |
| |
| #include "../../../../C/CpuArch.h" |
| |
| // #include <stdio.h> |
| |
| #ifndef _WIN32 |
| |
| #define USE_POSIX_TIME |
| #define USE_POSIX_TIME2 |
| #endif // _WIN32 |
| |
| #ifdef USE_POSIX_TIME |
| #include <time.h> |
| #include <unistd.h> |
| #ifdef USE_POSIX_TIME2 |
| #include <sys/time.h> |
| #include <sys/times.h> |
| #endif |
| #endif // USE_POSIX_TIME |
| |
| #ifdef _WIN32 |
| #define USE_ALLOCA |
| #endif |
| |
| #ifdef USE_ALLOCA |
| #ifdef _WIN32 |
| #include <malloc.h> |
| #else |
| #include <stdlib.h> |
| #endif |
| #endif |
| |
| #include "../../../../C/7zCrc.h" |
| #include "../../../../C/RotateDefs.h" |
| |
| #ifndef Z7_ST |
| #include "../../../Windows/Synchronization.h" |
| #include "../../../Windows/Thread.h" |
| #endif |
| |
| #include "../../../Windows/FileFind.h" |
| #include "../../../Windows/FileIO.h" |
| #include "../../../Windows/SystemInfo.h" |
| |
| #include "../../../Common/MyBuffer2.h" |
| #include "../../../Common/IntToString.h" |
| #include "../../../Common/StringConvert.h" |
| #include "../../../Common/StringToInt.h" |
| #include "../../../Common/Wildcard.h" |
| |
| #include "../../Common/MethodProps.h" |
| #include "../../Common/StreamObjects.h" |
| #include "../../Common/StreamUtils.h" |
| |
| #include "Bench.h" |
| |
| using namespace NWindows; |
| |
| #ifndef Z7_ST |
| static const UInt32 k_LZMA = 0x030101; |
| #endif |
| |
| static const UInt64 kComplexInCommands = (UInt64)1 << |
| #ifdef UNDER_CE |
| 31; |
| #else |
| 34; |
| #endif |
| |
| static const UInt32 kComplexInMs = 4000; |
| |
| static void SetComplexCommandsMs(UInt32 complexInMs, |
| bool isSpecifiedFreq, UInt64 cpuFreq, UInt64 &complexInCommands) |
| { |
| complexInCommands = kComplexInCommands; |
| const UInt64 kMinFreq = (UInt64)1000000 * 4; |
| const UInt64 kMaxFreq = (UInt64)1000000 * 20000; |
| if (cpuFreq < kMinFreq && !isSpecifiedFreq) |
| cpuFreq = kMinFreq; |
| if (cpuFreq < kMaxFreq || isSpecifiedFreq) |
| { |
| if (complexInMs != 0) |
| complexInCommands = complexInMs * cpuFreq / 1000; |
| else |
| complexInCommands = cpuFreq >> 2; |
| } |
| } |
| |
| // const UInt64 kBenchmarkUsageMult = 1000000; // for debug |
| static const unsigned kBenchmarkUsageMultBits = 16; |
| static const UInt64 kBenchmarkUsageMult = 1 << kBenchmarkUsageMultBits; |
| |
| UInt64 Benchmark_GetUsage_Percents(UInt64 usage) |
| { |
| return (100 * usage + kBenchmarkUsageMult / 2) / kBenchmarkUsageMult; |
| } |
| |
| static const unsigned kNumHashDictBits = 17; |
| static const UInt32 kFilterUnpackSize = (47 << 10); // + 5; // for test |
| |
| static const unsigned kOldLzmaDictBits = 32; |
| |
| // static const size_t kAdditionalSize = (size_t)1 << 32; // for debug |
| static const size_t kAdditionalSize = (size_t)1 << 16; |
| static const UInt32 kCompressedAdditionalSize = (1 << 10); |
| |
| static const UInt32 kMaxMethodPropSize = (1 << 6); |
| |
| |
| #define ALLOC_WITH_HRESULT(_buffer_, _size_) \ |
| { (_buffer_)->Alloc(_size_); \ |
| if (_size_ && !(_buffer_)->IsAllocated()) return E_OUTOFMEMORY; } |
| |
| |
| class CBaseRandomGenerator |
| { |
| UInt32 A1; |
| UInt32 A2; |
| UInt32 Salt; |
| public: |
| CBaseRandomGenerator(UInt32 salt = 0): Salt(salt) { Init(); } |
| void Init() { A1 = 362436069; A2 = 521288629;} |
| Z7_FORCE_INLINE |
| UInt32 GetRnd() |
| { |
| return Salt ^ |
| ( |
| ((A1 = 36969 * (A1 & 0xffff) + (A1 >> 16)) << 16) + |
| ((A2 = 18000 * (A2 & 0xffff) + (A2 >> 16)) ) |
| ); |
| } |
| }; |
| |
| |
| Z7_NO_INLINE |
| static void RandGen(Byte *buf, size_t size) |
| { |
| CBaseRandomGenerator RG; |
| const size_t size4 = size & ~((size_t)3); |
| size_t i; |
| for (i = 0; i < size4; i += 4) |
| { |
| const UInt32 v = RG.GetRnd(); |
| SetUi32(buf + i, v) |
| } |
| UInt32 v = RG.GetRnd(); |
| for (; i < size; i++) |
| { |
| buf[i] = (Byte)v; |
| v >>= 8; |
| } |
| } |
| |
| |
| class CBenchRandomGenerator: public CMidAlignedBuffer |
| { |
| static UInt32 GetVal(UInt32 &res, unsigned numBits) |
| { |
| UInt32 val = res & (((UInt32)1 << numBits) - 1); |
| res >>= numBits; |
| return val; |
| } |
| |
| static UInt32 GetLen(UInt32 &r) |
| { |
| UInt32 len = GetVal(r, 2); |
| return GetVal(r, 1 + len); |
| } |
| |
| public: |
| |
| void GenerateSimpleRandom(UInt32 salt) |
| { |
| CBaseRandomGenerator rg(salt); |
| const size_t bufSize = Size(); |
| Byte *buf = (Byte *)*this; |
| for (size_t i = 0; i < bufSize; i++) |
| buf[i] = (Byte)rg.GetRnd(); |
| } |
| |
| void GenerateLz(unsigned dictBits, UInt32 salt) |
| { |
| CBaseRandomGenerator rg(salt); |
| size_t pos = 0; |
| size_t rep0 = 1; |
| const size_t bufSize = Size(); |
| Byte *buf = (Byte *)*this; |
| unsigned posBits = 1; |
| |
| // printf("\n dictBits = %d\n", (UInt32)dictBits); |
| // printf("\n bufSize = 0x%p\n", (const void *)bufSize); |
| |
| while (pos < bufSize) |
| { |
| /* |
| if (pos >= ((UInt32)1 << 31)) |
| printf(" %x\n", pos); |
| */ |
| UInt32 r = rg.GetRnd(); |
| if (GetVal(r, 1) == 0 || pos < 1024) |
| buf[pos++] = (Byte)(r & 0xFF); |
| else |
| { |
| UInt32 len; |
| len = 1 + GetLen(r); |
| |
| if (GetVal(r, 3) != 0) |
| { |
| len += GetLen(r); |
| |
| while (((size_t)1 << posBits) < pos) |
| posBits++; |
| |
| unsigned numBitsMax = dictBits; |
| if (numBitsMax > posBits) |
| numBitsMax = posBits; |
| |
| const unsigned kAddBits = 6; |
| unsigned numLogBits = 5; |
| if (numBitsMax <= (1 << 4) - 1 + kAddBits) |
| numLogBits = 4; |
| |
| for (;;) |
| { |
| const UInt32 ppp = GetVal(r, numLogBits) + kAddBits; |
| r = rg.GetRnd(); |
| if (ppp > numBitsMax) |
| continue; |
| // rep0 = GetVal(r, ppp); |
| rep0 = r & (((size_t)1 << ppp) - 1); |
| if (rep0 < pos) |
| break; |
| r = rg.GetRnd(); |
| } |
| rep0++; |
| } |
| |
| // len *= 300; // for debug |
| { |
| const size_t rem = bufSize - pos; |
| if (len > rem) |
| len = (UInt32)rem; |
| } |
| Byte *dest = buf + pos; |
| const Byte *src = dest - rep0; |
| pos += len; |
| for (UInt32 i = 0; i < len; i++) |
| *dest++ = *src++; |
| } |
| } |
| // printf("\n CRC = %x\n", CrcCalc(buf, bufSize)); |
| } |
| }; |
| |
| |
| Z7_CLASS_IMP_NOQIB_1( |
| CBenchmarkInStream |
| , ISequentialInStream |
| ) |
| const Byte *Data; |
| size_t Pos; |
| size_t Size; |
| public: |
| void Init(const Byte *data, size_t size) |
| { |
| Data = data; |
| Size = size; |
| Pos = 0; |
| } |
| bool WasFinished() const { return Pos == Size; } |
| }; |
| |
| Z7_COM7F_IMF(CBenchmarkInStream::Read(void *data, UInt32 size, UInt32 *processedSize)) |
| { |
| const UInt32 kMaxBlockSize = (1 << 20); |
| if (size > kMaxBlockSize) |
| size = kMaxBlockSize; |
| const size_t remain = Size - Pos; |
| if (size > remain) |
| size = (UInt32)remain; |
| |
| if (size != 0) |
| memcpy(data, Data + Pos, size); |
| |
| Pos += size; |
| if (processedSize) |
| *processedSize = size; |
| return S_OK; |
| } |
| |
| |
| class CBenchmarkOutStream Z7_final: |
| public ISequentialOutStream, |
| public CMyUnknownImp, |
| public CMidAlignedBuffer |
| { |
| Z7_COM_UNKNOWN_IMP_0 |
| Z7_IFACE_COM7_IMP(ISequentialOutStream) |
| // bool _overflow; |
| public: |
| size_t Pos; |
| bool RealCopy; |
| bool CalcCrc; |
| UInt32 Crc; |
| |
| // CBenchmarkOutStream(): _overflow(false) {} |
| void Init(bool realCopy, bool calcCrc) |
| { |
| Crc = CRC_INIT_VAL; |
| RealCopy = realCopy; |
| CalcCrc = calcCrc; |
| // _overflow = false; |
| Pos = 0; |
| } |
| |
| void InitCrc() |
| { |
| Crc = CRC_INIT_VAL; |
| } |
| |
| void Calc(const void *data, size_t size) |
| { |
| Crc = CrcUpdate(Crc, data, size); |
| } |
| |
| size_t GetPos() const { return Pos; } |
| |
| // void Print() { printf("\n%8d %8d\n", (unsigned)BufferSize, (unsigned)Pos); } |
| }; |
| |
| Z7_COM7F_IMF(CBenchmarkOutStream::Write(const void *data, UInt32 size, UInt32 *processedSize)) |
| { |
| size_t curSize = Size() - Pos; |
| if (curSize > size) |
| curSize = size; |
| if (curSize != 0) |
| { |
| if (RealCopy) |
| memcpy(((Byte *)*this) + Pos, data, curSize); |
| if (CalcCrc) |
| Calc(data, curSize); |
| Pos += curSize; |
| } |
| if (processedSize) |
| *processedSize = (UInt32)curSize; |
| if (curSize != size) |
| { |
| // _overflow = true; |
| return E_FAIL; |
| } |
| return S_OK; |
| } |
| |
| |
| Z7_CLASS_IMP_NOQIB_1( |
| CCrcOutStream |
| , ISequentialOutStream |
| ) |
| public: |
| bool CalcCrc; |
| UInt32 Crc; |
| UInt64 Pos; |
| |
| CCrcOutStream(): CalcCrc(true) {} |
| void Init() { Crc = CRC_INIT_VAL; Pos = 0; } |
| void Calc(const void *data, size_t size) |
| { |
| Crc = CrcUpdate(Crc, data, size); |
| } |
| }; |
| |
| Z7_COM7F_IMF(CCrcOutStream::Write(const void *data, UInt32 size, UInt32 *processedSize)) |
| { |
| if (CalcCrc) |
| Calc(data, size); |
| Pos += size; |
| if (processedSize) |
| *processedSize = size; |
| return S_OK; |
| } |
| |
| // #include "../../../../C/My_sys_time.h" |
| |
| static UInt64 GetTimeCount() |
| { |
| #ifdef USE_POSIX_TIME |
| #ifdef USE_POSIX_TIME2 |
| timeval v; |
| if (gettimeofday(&v, NULL) == 0) |
| return (UInt64)(v.tv_sec) * 1000000 + (UInt64)v.tv_usec; |
| return (UInt64)time(NULL) * 1000000; |
| #else |
| return time(NULL); |
| #endif |
| #else |
| LARGE_INTEGER value; |
| if (::QueryPerformanceCounter(&value)) |
| return (UInt64)value.QuadPart; |
| return GetTickCount(); |
| #endif |
| } |
| |
| static UInt64 GetFreq() |
| { |
| #ifdef USE_POSIX_TIME |
| #ifdef USE_POSIX_TIME2 |
| return 1000000; |
| #else |
| return 1; |
| #endif |
| #else |
| LARGE_INTEGER value; |
| if (::QueryPerformanceFrequency(&value)) |
| return (UInt64)value.QuadPart; |
| return 1000; |
| #endif |
| } |
| |
| |
| #ifdef USE_POSIX_TIME |
| |
| struct CUserTime |
| { |
| UInt64 Sum; |
| clock_t Prev; |
| |
| void Init() |
| { |
| // Prev = clock(); |
| Sum = 0; |
| Prev = 0; |
| Update(); |
| Sum = 0; |
| } |
| |
| void Update() |
| { |
| tms t; |
| /* clock_t res = */ times(&t); |
| clock_t newVal = t.tms_utime + t.tms_stime; |
| Sum += (UInt64)(newVal - Prev); |
| Prev = newVal; |
| |
| /* |
| clock_t v = clock(); |
| if (v != -1) |
| { |
| Sum += v - Prev; |
| Prev = v; |
| } |
| */ |
| } |
| UInt64 GetUserTime() |
| { |
| Update(); |
| return Sum; |
| } |
| }; |
| |
| #else |
| |
| |
| struct CUserTime |
| { |
| bool UseTick; |
| DWORD Prev_Tick; |
| UInt64 Prev; |
| UInt64 Sum; |
| |
| void Init() |
| { |
| UseTick = false; |
| Prev_Tick = 0; |
| Prev = 0; |
| Sum = 0; |
| Update(); |
| Sum = 0; |
| } |
| UInt64 GetUserTime() |
| { |
| Update(); |
| return Sum; |
| } |
| void Update(); |
| }; |
| |
| static inline UInt64 GetTime64(const FILETIME &t) { return ((UInt64)t.dwHighDateTime << 32) | t.dwLowDateTime; } |
| |
| void CUserTime::Update() |
| { |
| DWORD new_Tick = GetTickCount(); |
| FILETIME creationTime, exitTime, kernelTime, userTime; |
| if (!UseTick && |
| #ifdef UNDER_CE |
| ::GetThreadTimes(::GetCurrentThread() |
| #else |
| ::GetProcessTimes(::GetCurrentProcess() |
| #endif |
| , &creationTime, &exitTime, &kernelTime, &userTime)) |
| { |
| UInt64 newVal = GetTime64(userTime) + GetTime64(kernelTime); |
| Sum += newVal - Prev; |
| Prev = newVal; |
| } |
| else |
| { |
| UseTick = true; |
| Sum += (UInt64)(new_Tick - (DWORD)Prev_Tick) * 10000; |
| } |
| Prev_Tick = new_Tick; |
| } |
| |
| |
| #endif |
| |
| static UInt64 GetUserFreq() |
| { |
| #ifdef USE_POSIX_TIME |
| // return CLOCKS_PER_SEC; |
| return (UInt64)sysconf(_SC_CLK_TCK); |
| #else |
| return 10000000; |
| #endif |
| } |
| |
| class CBenchProgressStatus Z7_final |
| { |
| #ifndef Z7_ST |
| NSynchronization::CCriticalSection CS; |
| #endif |
| public: |
| HRESULT Res; |
| bool EncodeMode; |
| void SetResult(HRESULT res) |
| { |
| #ifndef Z7_ST |
| NSynchronization::CCriticalSectionLock lock(CS); |
| #endif |
| Res = res; |
| } |
| HRESULT GetResult() |
| { |
| #ifndef Z7_ST |
| NSynchronization::CCriticalSectionLock lock(CS); |
| #endif |
| return Res; |
| } |
| }; |
| |
| struct CBenchInfoCalc |
| { |
| CBenchInfo BenchInfo; |
| CUserTime UserTime; |
| |
| void SetStartTime(); |
| void SetFinishTime(CBenchInfo &dest); |
| }; |
| |
| void CBenchInfoCalc::SetStartTime() |
| { |
| BenchInfo.GlobalFreq = GetFreq(); |
| BenchInfo.UserFreq = GetUserFreq(); |
| BenchInfo.GlobalTime = ::GetTimeCount(); |
| BenchInfo.UserTime = 0; |
| UserTime.Init(); |
| } |
| |
| void CBenchInfoCalc::SetFinishTime(CBenchInfo &dest) |
| { |
| dest = BenchInfo; |
| dest.GlobalTime = ::GetTimeCount() - BenchInfo.GlobalTime; |
| dest.UserTime = UserTime.GetUserTime(); |
| } |
| |
| class CBenchProgressInfo Z7_final: |
| public ICompressProgressInfo, |
| public CMyUnknownImp, |
| public CBenchInfoCalc |
| { |
| Z7_COM_UNKNOWN_IMP_0 |
| Z7_IFACE_COM7_IMP(ICompressProgressInfo) |
| public: |
| CBenchProgressStatus *Status; |
| IBenchCallback *Callback; |
| |
| CBenchProgressInfo(): Callback(NULL) {} |
| }; |
| |
| |
| Z7_COM7F_IMF(CBenchProgressInfo::SetRatioInfo(const UInt64 *inSize, const UInt64 *outSize)) |
| { |
| HRESULT res = Status->GetResult(); |
| if (res != S_OK) |
| return res; |
| if (!Callback) |
| return res; |
| |
| /* |
| static UInt64 inSizePrev = 0; |
| static UInt64 outSizePrev = 0; |
| UInt64 delta1 = 0, delta2 = 0, val1 = 0, val2 = 0; |
| if (inSize) { val1 = *inSize; delta1 = val1 - inSizePrev; inSizePrev = val1; } |
| if (outSize) { val2 = *outSize; delta2 = val2 - outSizePrev; outSizePrev = val2; } |
| UInt64 percents = delta2 * 1000; |
| if (delta1 != 0) |
| percents /= delta1; |
| printf("=== %7d %7d %7d %7d ratio = %4d\n", |
| (unsigned)(val1 >> 10), (unsigned)(delta1 >> 10), |
| (unsigned)(val2 >> 10), (unsigned)(delta2 >> 10), |
| (unsigned)percents); |
| */ |
| |
| CBenchInfo info; |
| SetFinishTime(info); |
| if (Status->EncodeMode) |
| { |
| info.UnpackSize = BenchInfo.UnpackSize + *inSize; |
| info.PackSize = BenchInfo.PackSize + *outSize; |
| res = Callback->SetEncodeResult(info, false); |
| } |
| else |
| { |
| info.PackSize = BenchInfo.PackSize + *inSize; |
| info.UnpackSize = BenchInfo.UnpackSize + *outSize; |
| res = Callback->SetDecodeResult(info, false); |
| } |
| if (res != S_OK) |
| Status->SetResult(res); |
| return res; |
| } |
| |
| static const unsigned kSubBits = 8; |
| |
| static unsigned GetLogSize(UInt64 size) |
| { |
| unsigned i = 0; |
| for (;;) |
| { |
| i++; size >>= 1; if (size == 0) break; |
| } |
| return i; |
| } |
| |
| |
| static UInt32 GetLogSize_Sub(UInt64 size) |
| { |
| if (size <= 1) |
| return 0; |
| const unsigned i = GetLogSize(size) - 1; |
| UInt32 v; |
| if (i <= kSubBits) |
| v = (UInt32)(size) << (kSubBits - i); |
| else |
| v = (UInt32)(size >> (i - kSubBits)); |
| return ((UInt32)i << kSubBits) + (v & (((UInt32)1 << kSubBits) - 1)); |
| } |
| |
| |
| static UInt64 Get_UInt64_from_double(double v) |
| { |
| const UInt64 kMaxVal = (UInt64)1 << 62; |
| if (v > (double)(Int64)kMaxVal) |
| return kMaxVal; |
| return (UInt64)v; |
| } |
| |
| static UInt64 MyMultDiv64(UInt64 m1, UInt64 m2, UInt64 d) |
| { |
| if (d == 0) |
| d = 1; |
| const double v = |
| (double)(Int64)m1 * |
| (double)(Int64)m2 / |
| (double)(Int64)d; |
| return Get_UInt64_from_double(v); |
| /* |
| unsigned n1 = GetLogSize(m1); |
| unsigned n2 = GetLogSize(m2); |
| while (n1 + n2 > 64) |
| { |
| if (n1 >= n2) |
| { |
| m1 >>= 1; |
| n1--; |
| } |
| else |
| { |
| m2 >>= 1; |
| n2--; |
| } |
| d >>= 1; |
| } |
| |
| if (d == 0) |
| d = 1; |
| return m1 * m2 / d; |
| */ |
| } |
| |
| |
| UInt64 CBenchInfo::GetUsage() const |
| { |
| UInt64 userTime = UserTime; |
| UInt64 userFreq = UserFreq; |
| UInt64 globalTime = GlobalTime; |
| UInt64 globalFreq = GlobalFreq; |
| |
| if (userFreq == 0) |
| userFreq = 1; |
| if (globalTime == 0) |
| globalTime = 1; |
| |
| const double v = |
| ((double)(Int64)userTime / (double)(Int64)userFreq) |
| * ((double)(Int64)globalFreq / (double)(Int64)globalTime) |
| * (double)(Int64)kBenchmarkUsageMult; |
| return Get_UInt64_from_double(v); |
| /* |
| return MyMultDiv64( |
| MyMultDiv64(kBenchmarkUsageMult, userTime, userFreq), |
| globalFreq, globalTime); |
| */ |
| } |
| |
| |
| UInt64 CBenchInfo::GetRatingPerUsage(UInt64 rating) const |
| { |
| if (UserTime == 0) |
| { |
| return 0; |
| // userTime = 1; |
| } |
| UInt64 globalFreq = GlobalFreq; |
| if (globalFreq == 0) |
| globalFreq = 1; |
| |
| const double v = |
| ((double)(Int64)GlobalTime / (double)(Int64)globalFreq) |
| * ((double)(Int64)UserFreq / (double)(Int64)UserTime) |
| * (double)(Int64)rating; |
| return Get_UInt64_from_double(v); |
| /* |
| return MyMultDiv64( |
| MyMultDiv64(rating, UserFreq, UserTime), |
| GlobalTime, globalFreq); |
| */ |
| } |
| |
| |
| UInt64 CBenchInfo::GetSpeed(UInt64 numUnits) const |
| { |
| return MyMultDiv64(numUnits, GlobalFreq, GlobalTime); |
| } |
| |
| static UInt64 GetNumCommands_from_Size_and_Complexity(UInt64 size, Int32 complexity) |
| { |
| return complexity >= 0 ? |
| size * (UInt32)complexity : |
| size / (UInt32)(-complexity); |
| } |
| |
| struct CBenchProps |
| { |
| bool LzmaRatingMode; |
| |
| Int32 EncComplex; |
| Int32 DecComplexCompr; |
| Int32 DecComplexUnc; |
| |
| unsigned KeySize; |
| |
| CBenchProps(): |
| LzmaRatingMode(false), |
| KeySize(0) |
| {} |
| |
| void SetLzmaCompexity(); |
| |
| UInt64 GetNumCommands_Enc(UInt64 unpackSize) const |
| { |
| const UInt32 kMinSize = 100; |
| if (unpackSize < kMinSize) |
| unpackSize = kMinSize; |
| return GetNumCommands_from_Size_and_Complexity(unpackSize, EncComplex); |
| } |
| |
| UInt64 GetNumCommands_Dec(UInt64 packSize, UInt64 unpackSize) const |
| { |
| return |
| GetNumCommands_from_Size_and_Complexity(packSize, DecComplexCompr) + |
| GetNumCommands_from_Size_and_Complexity(unpackSize, DecComplexUnc); |
| } |
| |
| UInt64 GetRating_Enc(UInt64 dictSize, UInt64 elapsedTime, UInt64 freq, UInt64 size) const; |
| UInt64 GetRating_Dec(UInt64 elapsedTime, UInt64 freq, UInt64 outSize, UInt64 inSize, UInt64 numIterations) const; |
| }; |
| |
| void CBenchProps::SetLzmaCompexity() |
| { |
| EncComplex = 1200; |
| DecComplexUnc = 4; |
| DecComplexCompr = 190; |
| LzmaRatingMode = true; |
| } |
| |
| UInt64 CBenchProps::GetRating_Enc(UInt64 dictSize, UInt64 elapsedTime, UInt64 freq, UInt64 size) const |
| { |
| if (dictSize < (1 << kBenchMinDicLogSize)) |
| dictSize = (1 << kBenchMinDicLogSize); |
| Int32 encComplex = EncComplex; |
| if (LzmaRatingMode) |
| { |
| /* |
| for (UInt64 uu = 0; uu < (UInt64)0xf << 60;) |
| { |
| unsigned rr = GetLogSize_Sub(uu); |
| printf("\n%16I64x , log = %4x", uu, rr); |
| uu += 1; |
| uu += uu / 50; |
| } |
| */ |
| // throw 1; |
| const UInt32 t = GetLogSize_Sub(dictSize) - (kBenchMinDicLogSize << kSubBits); |
| encComplex = 870 + ((t * t * 5) >> (2 * kSubBits)); |
| } |
| const UInt64 numCommands = GetNumCommands_from_Size_and_Complexity(size, encComplex); |
| return MyMultDiv64(numCommands, freq, elapsedTime); |
| } |
| |
| UInt64 CBenchProps::GetRating_Dec(UInt64 elapsedTime, UInt64 freq, UInt64 outSize, UInt64 inSize, UInt64 numIterations) const |
| { |
| const UInt64 numCommands = GetNumCommands_Dec(inSize, outSize) * numIterations; |
| return MyMultDiv64(numCommands, freq, elapsedTime); |
| } |
| |
| |
| |
| UInt64 CBenchInfo::GetRating_LzmaEnc(UInt64 dictSize) const |
| { |
| CBenchProps props; |
| props.SetLzmaCompexity(); |
| return props.GetRating_Enc(dictSize, GlobalTime, GlobalFreq, UnpackSize * NumIterations); |
| } |
| |
| UInt64 CBenchInfo::GetRating_LzmaDec() const |
| { |
| CBenchProps props; |
| props.SetLzmaCompexity(); |
| return props.GetRating_Dec(GlobalTime, GlobalFreq, UnpackSize, PackSize, NumIterations); |
| } |
| |
| |
| #ifndef Z7_ST |
| |
| #define NUM_CPU_LEVELS_MAX 3 |
| |
| struct CAffinityMode |
| { |
| unsigned NumBundleThreads; |
| unsigned NumLevels; |
| unsigned NumCoreThreads; |
| unsigned NumCores; |
| // unsigned DivideNum; |
| UInt32 Sizes[NUM_CPU_LEVELS_MAX]; |
| |
| void SetLevels(unsigned numCores, unsigned numCoreThreads); |
| DWORD_PTR GetAffinityMask(UInt32 bundleIndex, CCpuSet *cpuSet) const; |
| bool NeedAffinity() const { return NumBundleThreads != 0; } |
| |
| WRes CreateThread_WithAffinity(NWindows::CThread &thread, THREAD_FUNC_TYPE startAddress, LPVOID parameter, UInt32 bundleIndex) const |
| { |
| if (NeedAffinity()) |
| { |
| CCpuSet cpuSet; |
| GetAffinityMask(bundleIndex, &cpuSet); |
| return thread.Create_With_CpuSet(startAddress, parameter, &cpuSet); |
| } |
| return thread.Create(startAddress, parameter); |
| } |
| |
| CAffinityMode(): |
| NumBundleThreads(0), |
| NumLevels(0), |
| NumCoreThreads(1) |
| // DivideNum(1) |
| {} |
| }; |
| |
| void CAffinityMode::SetLevels(unsigned numCores, unsigned numCoreThreads) |
| { |
| NumCores = numCores; |
| NumCoreThreads = numCoreThreads; |
| NumLevels = 0; |
| if (numCoreThreads == 0 || numCores == 0 || numCores % numCoreThreads != 0) |
| return; |
| UInt32 c = numCores / numCoreThreads; |
| UInt32 c2 = 1; |
| while ((c & 1) == 0) |
| { |
| c >>= 1; |
| c2 <<= 1; |
| } |
| if (c2 != 1) |
| Sizes[NumLevels++] = c2; |
| if (c != 1) |
| Sizes[NumLevels++] = c; |
| if (numCoreThreads != 1) |
| Sizes[NumLevels++] = numCoreThreads; |
| if (NumLevels == 0) |
| Sizes[NumLevels++] = 1; |
| |
| /* |
| printf("\n Cores:"); |
| for (unsigned i = 0; i < NumLevels; i++) |
| { |
| printf(" %d", Sizes[i]); |
| } |
| printf("\n"); |
| */ |
| } |
| |
| |
| DWORD_PTR CAffinityMode::GetAffinityMask(UInt32 bundleIndex, CCpuSet *cpuSet) const |
| { |
| CpuSet_Zero(cpuSet); |
| |
| if (NumLevels == 0) |
| return 0; |
| |
| // printf("\n%2d", bundleIndex); |
| |
| /* |
| UInt32 low = 0; |
| if (DivideNum != 1) |
| { |
| low = bundleIndex % DivideNum; |
| bundleIndex /= DivideNum; |
| } |
| */ |
| |
| UInt32 numGroups = NumCores / NumBundleThreads; |
| UInt32 m = bundleIndex % numGroups; |
| UInt32 v = 0; |
| for (unsigned i = 0; i < NumLevels; i++) |
| { |
| UInt32 size = Sizes[i]; |
| while ((size & 1) == 0) |
| { |
| v *= 2; |
| v |= (m & 1); |
| m >>= 1; |
| size >>= 1; |
| } |
| v *= size; |
| v += m % size; |
| m /= size; |
| } |
| |
| // UInt32 nb = NumBundleThreads / DivideNum; |
| UInt32 nb = NumBundleThreads; |
| |
| DWORD_PTR mask = ((DWORD_PTR)1 << nb) - 1; |
| // v += low; |
| mask <<= v; |
| |
| // printf(" %2d %8x \n ", v, (unsigned)mask); |
| #ifdef _WIN32 |
| *cpuSet = mask; |
| #else |
| { |
| for (unsigned k = 0; k < nb; k++) |
| CpuSet_Set(cpuSet, v + k); |
| } |
| #endif |
| |
| return mask; |
| } |
| |
| |
| struct CBenchSyncCommon |
| { |
| bool ExitMode; |
| NSynchronization::CManualResetEvent StartEvent; |
| |
| CBenchSyncCommon(): ExitMode(false) {} |
| }; |
| |
| #endif |
| |
| |
| |
| enum E_CheckCrcMode |
| { |
| k_CheckCrcMode_Never = 0, |
| k_CheckCrcMode_Always = 1, |
| k_CheckCrcMode_FirstPass = 2 |
| }; |
| |
| class CEncoderInfo; |
| |
| class CEncoderInfo Z7_final |
| { |
| Z7_CLASS_NO_COPY(CEncoderInfo) |
| |
| public: |
| |
| #ifndef Z7_ST |
| NWindows::CThread thread[2]; |
| NSynchronization::CManualResetEvent ReadyEvent; |
| UInt32 NumDecoderSubThreads; |
| CBenchSyncCommon *Common; |
| UInt32 EncoderIndex; |
| UInt32 NumEncoderInternalThreads; |
| CAffinityMode AffinityMode; |
| bool IsGlobalMtMode; // if more than one benchmark encoder threads |
| #endif |
| |
| CMyComPtr<ICompressCoder> _encoder; |
| CMyComPtr<ICompressFilter> _encoderFilter; |
| CBenchProgressInfo *progressInfoSpec[2]; |
| CMyComPtr<ICompressProgressInfo> progressInfo[2]; |
| UInt64 NumIterations; |
| |
| UInt32 Salt; |
| |
| #ifdef USE_ALLOCA |
| size_t AllocaSize; |
| #endif |
| |
| unsigned KeySize; |
| Byte _key[32]; |
| Byte _iv[16]; |
| |
| HRESULT Set_Key_and_IV(ICryptoProperties *cp) |
| { |
| RINOK(cp->SetKey(_key, KeySize)) |
| return cp->SetInitVector(_iv, sizeof(_iv)); |
| } |
| |
| Byte _psw[16]; |
| |
| bool CheckCrc_Enc; /* = 1, if we want to check packed data crcs after each pass |
| used for filter and usual coders */ |
| bool UseRealData_Enc; /* = 1, if we want to use only original data for each pass |
| used only for filter */ |
| E_CheckCrcMode CheckCrcMode_Dec; |
| |
| struct CDecoderInfo |
| { |
| CEncoderInfo *Encoder; |
| UInt32 DecoderIndex; |
| bool CallbackMode; |
| |
| #ifdef USE_ALLOCA |
| size_t AllocaSize; |
| #endif |
| }; |
| CDecoderInfo decodersInfo[2]; |
| |
| CMyComPtr<ICompressCoder> _decoders[2]; |
| CMyComPtr<ICompressFilter> _decoderFilter; |
| |
| HRESULT Results[2]; |
| CBenchmarkOutStream *outStreamSpec; |
| CMyComPtr<ISequentialOutStream> outStream; |
| IBenchCallback *callback; |
| IBenchPrintCallback *printCallback; |
| UInt32 crc; |
| size_t kBufferSize; |
| size_t compressedSize; |
| const Byte *uncompressedDataPtr; |
| |
| const Byte *fileData; |
| CBenchRandomGenerator rg; |
| |
| CMidAlignedBuffer rgCopy; // it must be 16-byte aligned !!! |
| |
| // CBenchmarkOutStream *propStreamSpec; |
| Byte propsData[kMaxMethodPropSize]; |
| CBufPtrSeqOutStream *propStreamSpec; |
| CMyComPtr<ISequentialOutStream> propStream; |
| |
| unsigned generateDictBits; |
| COneMethodInfo _method; |
| |
| // for decode |
| size_t _uncompressedDataSize; |
| |
| HRESULT Generate(); |
| HRESULT Encode(); |
| HRESULT Decode(UInt32 decoderIndex); |
| |
| CEncoderInfo(): |
| #ifndef Z7_ST |
| Common(NULL), |
| IsGlobalMtMode(true), |
| #endif |
| Salt(0), |
| KeySize(0), |
| CheckCrc_Enc(true), |
| UseRealData_Enc(true), |
| CheckCrcMode_Dec(k_CheckCrcMode_Always), |
| outStreamSpec(NULL), |
| callback(NULL), |
| printCallback(NULL), |
| fileData(NULL), |
| propStreamSpec(NULL) |
| {} |
| |
| #ifndef Z7_ST |
| |
| static THREAD_FUNC_DECL EncodeThreadFunction(void *param) |
| { |
| HRESULT res; |
| CEncoderInfo *encoder = (CEncoderInfo *)param; |
| try |
| { |
| #ifdef USE_ALLOCA |
| alloca(encoder->AllocaSize); |
| #endif |
| |
| res = encoder->Encode(); |
| } |
| catch(...) |
| { |
| res = E_FAIL; |
| } |
| encoder->Results[0] = res; |
| if (res != S_OK) |
| encoder->progressInfoSpec[0]->Status->SetResult(res); |
| encoder->ReadyEvent.Set(); |
| return THREAD_FUNC_RET_ZERO; |
| } |
| |
| static THREAD_FUNC_DECL DecodeThreadFunction(void *param) |
| { |
| CDecoderInfo *decoder = (CDecoderInfo *)param; |
| |
| #ifdef USE_ALLOCA |
| alloca(decoder->AllocaSize); |
| #endif |
| |
| CEncoderInfo *encoder = decoder->Encoder; |
| encoder->Results[decoder->DecoderIndex] = encoder->Decode(decoder->DecoderIndex); |
| return THREAD_FUNC_RET_ZERO; |
| } |
| |
| HRESULT CreateEncoderThread() |
| { |
| WRes res = 0; |
| if (!ReadyEvent.IsCreated()) |
| res = ReadyEvent.Create(); |
| if (res == 0) |
| res = AffinityMode.CreateThread_WithAffinity(thread[0], EncodeThreadFunction, this, |
| EncoderIndex); |
| return HRESULT_FROM_WIN32(res); |
| } |
| |
| HRESULT CreateDecoderThread(unsigned index, bool callbackMode |
| #ifdef USE_ALLOCA |
| , size_t allocaSize |
| #endif |
| ) |
| { |
| CDecoderInfo &decoder = decodersInfo[index]; |
| decoder.DecoderIndex = index; |
| decoder.Encoder = this; |
| |
| #ifdef USE_ALLOCA |
| decoder.AllocaSize = allocaSize; |
| #endif |
| |
| decoder.CallbackMode = callbackMode; |
| |
| WRes res = AffinityMode.CreateThread_WithAffinity(thread[index], DecodeThreadFunction, &decoder, |
| // EncoderIndex * NumEncoderInternalThreads + index |
| EncoderIndex |
| ); |
| |
| return HRESULT_FROM_WIN32(res); |
| } |
| |
| #endif |
| }; |
| |
| |
| |
| |
| static size_t GetBenchCompressedSize(size_t bufferSize) |
| { |
| return kCompressedAdditionalSize + bufferSize + bufferSize / 16; |
| // kBufferSize / 2; |
| } |
| |
| |
| HRESULT CEncoderInfo::Generate() |
| { |
| const COneMethodInfo &method = _method; |
| |
| // we need extra space, if input data is already compressed |
| const size_t kCompressedBufferSize = _encoderFilter ? |
| kBufferSize : |
| GetBenchCompressedSize(kBufferSize); |
| |
| if (kCompressedBufferSize < kBufferSize) |
| return E_FAIL; |
| |
| uncompressedDataPtr = fileData; |
| if (fileData) |
| { |
| #if !defined(Z7_ST) |
| if (IsGlobalMtMode) |
| { |
| /* we copy the data to local buffer of thread to eliminate |
| using of shared buffer by different threads */ |
| ALLOC_WITH_HRESULT(&rg, kBufferSize) |
| memcpy((Byte *)rg, fileData, kBufferSize); |
| uncompressedDataPtr = (const Byte *)rg; |
| } |
| #endif |
| } |
| else |
| { |
| ALLOC_WITH_HRESULT(&rg, kBufferSize) |
| // DWORD ttt = GetTickCount(); |
| if (generateDictBits == 0) |
| rg.GenerateSimpleRandom(Salt); |
| else |
| { |
| if (generateDictBits >= sizeof(size_t) * 8 |
| && kBufferSize > ((size_t)1 << (sizeof(size_t) * 8 - 1))) |
| return E_INVALIDARG; |
| rg.GenerateLz(generateDictBits, Salt); |
| // return E_ABORT; // for debug |
| } |
| // printf("\n%d\n ", GetTickCount() - ttt); |
| |
| crc = CrcCalc((const Byte *)rg, rg.Size()); |
| uncompressedDataPtr = (const Byte *)rg; |
| } |
| |
| if (!outStream) |
| { |
| outStreamSpec = new CBenchmarkOutStream; |
| outStream = outStreamSpec; |
| } |
| |
| ALLOC_WITH_HRESULT(outStreamSpec, kCompressedBufferSize) |
| |
| if (_encoderFilter) |
| { |
| /* we try to reduce the number of memcpy() in main encoding loop. |
| so we copy data to temp buffers here */ |
| ALLOC_WITH_HRESULT(&rgCopy, kBufferSize) |
| memcpy((Byte *)*outStreamSpec, uncompressedDataPtr, kBufferSize); |
| memcpy((Byte *)rgCopy, uncompressedDataPtr, kBufferSize); |
| } |
| |
| if (!propStream) |
| { |
| propStreamSpec = new CBufPtrSeqOutStream; // CBenchmarkOutStream; |
| propStream = propStreamSpec; |
| } |
| // ALLOC_WITH_HRESULT_2(propStreamSpec, kMaxMethodPropSize); |
| // propStreamSpec->Init(true, false); |
| propStreamSpec->Init(propsData, sizeof(propsData)); |
| |
| |
| CMyComPtr<IUnknown> coder; |
| if (_encoderFilter) |
| coder = _encoderFilter; |
| else |
| coder = _encoder; |
| { |
| CMyComPtr<ICompressSetCoderProperties> scp; |
| coder.QueryInterface(IID_ICompressSetCoderProperties, &scp); |
| if (scp) |
| { |
| const UInt64 reduceSize = kBufferSize; |
| |
| /* in posix new thread uses same affinity as parent thread, |
| so we don't need to send affinity to coder in posix */ |
| UInt64 affMask; |
| #if !defined(Z7_ST) && defined(_WIN32) |
| { |
| CCpuSet cpuSet; |
| affMask = AffinityMode.GetAffinityMask(EncoderIndex, &cpuSet); |
| } |
| #else |
| affMask = 0; |
| #endif |
| // affMask <<= 3; // debug line: to test no affinity in coder; |
| // affMask = 0; |
| |
| RINOK(method.SetCoderProps_DSReduce_Aff(scp, &reduceSize, (affMask != 0 ? &affMask : NULL))) |
| } |
| else |
| { |
| if (method.AreThereNonOptionalProps()) |
| return E_INVALIDARG; |
| } |
| |
| CMyComPtr<ICompressWriteCoderProperties> writeCoderProps; |
| coder.QueryInterface(IID_ICompressWriteCoderProperties, &writeCoderProps); |
| if (writeCoderProps) |
| { |
| RINOK(writeCoderProps->WriteCoderProperties(propStream)) |
| } |
| |
| { |
| CMyComPtr<ICryptoSetPassword> sp; |
| coder.QueryInterface(IID_ICryptoSetPassword, &sp); |
| if (sp) |
| { |
| RINOK(sp->CryptoSetPassword(_psw, sizeof(_psw))) |
| |
| // we must call encoding one time to calculate password key for key cache. |
| // it must be after WriteCoderProperties! |
| Byte temp[16]; |
| memset(temp, 0, sizeof(temp)); |
| |
| if (_encoderFilter) |
| { |
| _encoderFilter->Init(); |
| _encoderFilter->Filter(temp, sizeof(temp)); |
| } |
| else |
| { |
| CBenchmarkInStream *inStreamSpec = new CBenchmarkInStream; |
| CMyComPtr<ISequentialInStream> inStream = inStreamSpec; |
| inStreamSpec->Init(temp, sizeof(temp)); |
| |
| CCrcOutStream *crcStreamSpec = new CCrcOutStream; |
| CMyComPtr<ISequentialOutStream> crcStream = crcStreamSpec; |
| crcStreamSpec->Init(); |
| |
| RINOK(_encoder->Code(inStream, crcStream, NULL, NULL, NULL)) |
| } |
| } |
| } |
| } |
| |
| return S_OK; |
| } |
| |
| |
| static void My_FilterBench(ICompressFilter *filter, Byte *data, size_t size, UInt32 *crc) |
| { |
| while (size != 0) |
| { |
| UInt32 cur = crc ? 1 << 17 : 1 << 24; |
| if (cur > size) |
| cur = (UInt32)size; |
| UInt32 processed = filter->Filter(data, cur); |
| /* if (processed > size) (in AES filter), we must fill last block with zeros. |
| but it is not important for benchmark. So we just copy that data without filtering. |
| if (processed == 0) then filter can't process more */ |
| if (processed > size || processed == 0) |
| processed = (UInt32)size; |
| if (crc) |
| *crc = CrcUpdate(*crc, data, processed); |
| data += processed; |
| size -= processed; |
| } |
| } |
| |
| |
| HRESULT CEncoderInfo::Encode() |
| { |
| // printf("\nCEncoderInfo::Generate\n"); |
| |
| RINOK(Generate()) |
| |
| // printf("\n2222\n"); |
| |
| #ifndef Z7_ST |
| if (Common) |
| { |
| Results[0] = S_OK; |
| WRes wres = ReadyEvent.Set(); |
| if (wres == 0) |
| wres = Common->StartEvent.Lock(); |
| if (wres != 0) |
| return HRESULT_FROM_WIN32(wres); |
| if (Common->ExitMode) |
| return S_OK; |
| } |
| else |
| #endif |
| { |
| CBenchProgressInfo *bpi = progressInfoSpec[0]; |
| bpi->SetStartTime(); |
| } |
| |
| |
| CBenchInfo &bi = progressInfoSpec[0]->BenchInfo; |
| bi.UnpackSize = 0; |
| bi.PackSize = 0; |
| CMyComPtr<ICryptoProperties> cp; |
| CMyComPtr<IUnknown> coder; |
| if (_encoderFilter) |
| coder = _encoderFilter; |
| else |
| coder = _encoder; |
| coder.QueryInterface(IID_ICryptoProperties, &cp); |
| CBenchmarkInStream *inStreamSpec = new CBenchmarkInStream; |
| CMyComPtr<ISequentialInStream> inStream = inStreamSpec; |
| |
| if (cp) |
| { |
| RINOK(Set_Key_and_IV(cp)) |
| } |
| |
| compressedSize = 0; |
| if (_encoderFilter) |
| compressedSize = kBufferSize; |
| |
| // CBenchmarkOutStream *outStreamSpec = this->outStreamSpec; |
| UInt64 prev = 0; |
| |
| const UInt32 mask = (CheckCrc_Enc ? 0 : 0xFFFF); |
| const bool useCrc = (mask < NumIterations); |
| bool crcPrev_defined = false; |
| UInt32 crcPrev = 0; |
| |
| bool useRealData_Enc = UseRealData_Enc; |
| bool data_Was_Changed = false; |
| if (useRealData_Enc) |
| { |
| /* we want memcpy() for each iteration including first iteration. |
| So results will be equal for different number of iterations */ |
| data_Was_Changed = true; |
| } |
| |
| const UInt64 numIterations = NumIterations; |
| UInt64 i = numIterations; |
| // printCallback->NewLine(); |
| |
| while (i != 0) |
| { |
| i--; |
| if (printCallback && bi.UnpackSize - prev >= (1 << 26)) |
| { |
| prev = bi.UnpackSize; |
| RINOK(printCallback->CheckBreak()) |
| } |
| |
| /* |
| CBenchInfo info; |
| progressInfoSpec[0]->SetStartTime(); |
| */ |
| |
| bool calcCrc = false; |
| if (useCrc) |
| calcCrc = (((UInt32)i & mask) == 0); |
| |
| if (_encoderFilter) |
| { |
| Byte *filterData = rgCopy; |
| if (i == numIterations - 1 || calcCrc || useRealData_Enc) |
| { |
| filterData = (Byte *)*outStreamSpec; |
| if (data_Was_Changed) |
| memcpy(filterData, uncompressedDataPtr, kBufferSize); |
| data_Was_Changed = true; |
| } |
| _encoderFilter->Init(); |
| if (calcCrc) |
| outStreamSpec->InitCrc(); |
| My_FilterBench(_encoderFilter, filterData, kBufferSize, |
| calcCrc ? &outStreamSpec->Crc : NULL); |
| } |
| else |
| { |
| outStreamSpec->Init(true, calcCrc); // write real data for speed consistency at any number of iterations |
| inStreamSpec->Init(uncompressedDataPtr, kBufferSize); |
| RINOK(_encoder->Code(inStream, outStream, NULL, NULL, progressInfo[0])) |
| if (!inStreamSpec->WasFinished()) |
| return E_FAIL; |
| if (compressedSize != outStreamSpec->Pos) |
| { |
| if (compressedSize != 0) |
| return E_FAIL; |
| compressedSize = outStreamSpec->Pos; |
| } |
| } |
| |
| // outStreamSpec->Print(); |
| |
| if (calcCrc) |
| { |
| const UInt32 crc2 = CRC_GET_DIGEST(outStreamSpec->Crc); |
| if (crcPrev_defined && crcPrev != crc2) |
| return E_FAIL; |
| crcPrev = crc2; |
| crcPrev_defined = true; |
| } |
| |
| bi.UnpackSize += kBufferSize; |
| bi.PackSize += compressedSize; |
| |
| /* |
| { |
| progressInfoSpec[0]->SetFinishTime(info); |
| info.UnpackSize = 0; |
| info.PackSize = 0; |
| info.NumIterations = 1; |
| |
| info.UnpackSize = kBufferSize; |
| info.PackSize = compressedSize; |
| // printf("\n%7d\n", encoder.compressedSize); |
| |
| RINOK(callback->SetEncodeResult(info, true)) |
| printCallback->NewLine(); |
| } |
| */ |
| |
| } |
| |
| _encoder.Release(); |
| _encoderFilter.Release(); |
| return S_OK; |
| } |
| |
| |
| HRESULT CEncoderInfo::Decode(UInt32 decoderIndex) |
| { |
| CBenchmarkInStream *inStreamSpec = new CBenchmarkInStream; |
| CMyComPtr<ISequentialInStream> inStream = inStreamSpec; |
| CMyComPtr<ICompressCoder> &decoder = _decoders[decoderIndex]; |
| CMyComPtr<IUnknown> coder; |
| if (_decoderFilter) |
| { |
| if (decoderIndex != 0) |
| return E_FAIL; |
| coder = _decoderFilter; |
| } |
| else |
| coder = decoder; |
| |
| CMyComPtr<ICompressSetDecoderProperties2> setDecProps; |
| coder.QueryInterface(IID_ICompressSetDecoderProperties2, &setDecProps); |
| if (!setDecProps && propStreamSpec->GetPos() != 0) |
| return E_FAIL; |
| |
| CCrcOutStream *crcOutStreamSpec = new CCrcOutStream; |
| CMyComPtr<ISequentialOutStream> crcOutStream = crcOutStreamSpec; |
| |
| CBenchProgressInfo *pi = progressInfoSpec[decoderIndex]; |
| pi->BenchInfo.UnpackSize = 0; |
| pi->BenchInfo.PackSize = 0; |
| |
| #ifndef Z7_ST |
| { |
| CMyComPtr<ICompressSetCoderMt> setCoderMt; |
| coder.QueryInterface(IID_ICompressSetCoderMt, &setCoderMt); |
| if (setCoderMt) |
| { |
| RINOK(setCoderMt->SetNumberOfThreads(NumDecoderSubThreads)) |
| } |
| } |
| #endif |
| |
| CMyComPtr<ICompressSetCoderProperties> scp; |
| coder.QueryInterface(IID_ICompressSetCoderProperties, &scp); |
| if (scp) |
| { |
| const UInt64 reduceSize = _uncompressedDataSize; |
| RINOK(_method.SetCoderProps(scp, &reduceSize)) |
| } |
| |
| CMyComPtr<ICryptoProperties> cp; |
| coder.QueryInterface(IID_ICryptoProperties, &cp); |
| |
| if (setDecProps) |
| { |
| RINOK(setDecProps->SetDecoderProperties2( |
| /* (const Byte *)*propStreamSpec, */ |
| propsData, |
| (UInt32)propStreamSpec->GetPos())) |
| } |
| |
| { |
| CMyComPtr<ICryptoSetPassword> sp; |
| coder.QueryInterface(IID_ICryptoSetPassword, &sp); |
| if (sp) |
| { |
| RINOK(sp->CryptoSetPassword(_psw, sizeof(_psw))) |
| } |
| } |
| |
| UInt64 prev = 0; |
| |
| if (cp) |
| { |
| RINOK(Set_Key_and_IV(cp)) |
| } |
| |
| CMyComPtr<ICompressSetFinishMode> setFinishMode; |
| |
| if (_decoderFilter) |
| { |
| if (compressedSize > rgCopy.Size()) |
| return E_FAIL; |
| } |
| else |
| { |
| decoder->QueryInterface(IID_ICompressSetFinishMode, (void **)&setFinishMode); |
| } |
| |
| const UInt64 numIterations = NumIterations; |
| const E_CheckCrcMode checkCrcMode = CheckCrcMode_Dec; |
| |
| for (UInt64 i = 0; i < numIterations; i++) |
| { |
| if (printCallback && pi->BenchInfo.UnpackSize - prev >= (1 << 26)) |
| { |
| RINOK(printCallback->CheckBreak()) |
| prev = pi->BenchInfo.UnpackSize; |
| } |
| |
| const UInt64 outSize = kBufferSize; |
| bool calcCrc = (checkCrcMode != k_CheckCrcMode_Never); |
| |
| crcOutStreamSpec->Init(); |
| |
| if (_decoderFilter) |
| { |
| Byte *filterData = (Byte *)*outStreamSpec; |
| if (calcCrc) |
| { |
| calcCrc = (i == 0); |
| if (checkCrcMode == k_CheckCrcMode_Always) |
| { |
| calcCrc = true; |
| memcpy((Byte *)rgCopy, (const Byte *)*outStreamSpec, compressedSize); |
| filterData = rgCopy; |
| } |
| } |
| _decoderFilter->Init(); |
| My_FilterBench(_decoderFilter, filterData, compressedSize, |
| calcCrc ? &crcOutStreamSpec->Crc : NULL); |
| } |
| else |
| { |
| crcOutStreamSpec->CalcCrc = calcCrc; |
| inStreamSpec->Init((const Byte *)*outStreamSpec, compressedSize); |
| |
| if (setFinishMode) |
| { |
| RINOK(setFinishMode->SetFinishMode(BoolToUInt(true))) |
| } |
| |
| RINOK(decoder->Code(inStream, crcOutStream, NULL, &outSize, progressInfo[decoderIndex])) |
| |
| if (setFinishMode) |
| { |
| if (!inStreamSpec->WasFinished()) |
| return S_FALSE; |
| |
| CMyComPtr<ICompressGetInStreamProcessedSize> getInStreamProcessedSize; |
| decoder.QueryInterface(IID_ICompressGetInStreamProcessedSize, (void **)&getInStreamProcessedSize); |
| |
| if (getInStreamProcessedSize) |
| { |
| UInt64 processed; |
| RINOK(getInStreamProcessedSize->GetInStreamProcessedSize(&processed)) |
| if (processed != compressedSize) |
| return S_FALSE; |
| } |
| } |
| |
| if (crcOutStreamSpec->Pos != outSize) |
| return S_FALSE; |
| } |
| |
| if (calcCrc && CRC_GET_DIGEST(crcOutStreamSpec->Crc) != crc) |
| return S_FALSE; |
| |
| pi->BenchInfo.UnpackSize += kBufferSize; |
| pi->BenchInfo.PackSize += compressedSize; |
| } |
| |
| decoder.Release(); |
| _decoderFilter.Release(); |
| return S_OK; |
| } |
| |
| |
| static const UInt32 kNumThreadsMax = (1 << 12); |
| |
| struct CBenchEncoders |
| { |
| CEncoderInfo *encoders; |
| CBenchEncoders(UInt32 num): encoders(NULL) { encoders = new CEncoderInfo[num]; } |
| ~CBenchEncoders() { delete []encoders; } |
| }; |
| |
| |
| static UInt64 GetNumIterations(UInt64 numCommands, UInt64 complexInCommands) |
| { |
| if (numCommands < (1 << 4)) |
| numCommands = (1 << 4); |
| UInt64 res = complexInCommands / numCommands; |
| return (res == 0 ? 1 : res); |
| } |
| |
| |
| |
| #ifndef Z7_ST |
| |
| // ---------- CBenchThreadsFlusher ---------- |
| |
| struct CBenchThreadsFlusher |
| { |
| CBenchEncoders *EncodersSpec; |
| CBenchSyncCommon Common; |
| unsigned NumThreads; |
| bool NeedClose; |
| |
| CBenchThreadsFlusher(): NumThreads(0), NeedClose(false) {} |
| |
| ~CBenchThreadsFlusher() |
| { |
| StartAndWait(true); |
| } |
| |
| WRes StartAndWait(bool exitMode = false); |
| }; |
| |
| |
| WRes CBenchThreadsFlusher::StartAndWait(bool exitMode) |
| { |
| if (!NeedClose) |
| return 0; |
| |
| Common.ExitMode = exitMode; |
| WRes res = Common.StartEvent.Set(); |
| |
| for (unsigned i = 0; i < NumThreads; i++) |
| { |
| NWindows::CThread &t = EncodersSpec->encoders[i].thread[0]; |
| if (t.IsCreated()) |
| { |
| WRes res2 = t.Wait_Close(); |
| if (res == 0) |
| res = res2; |
| } |
| } |
| NeedClose = false; |
| return res; |
| } |
| |
| #endif // Z7_ST |
| |
| |
| |
| static void SetPseudoRand(Byte *data, size_t size, UInt32 startValue) |
| { |
| for (size_t i = 0; i < size; i++) |
| { |
| data[i] = (Byte)startValue; |
| startValue++; |
| } |
| } |
| |
| |
| |
| static HRESULT MethodBench( |
| DECL_EXTERNAL_CODECS_LOC_VARS |
| UInt64 complexInCommands, |
| #ifndef Z7_ST |
| bool oldLzmaBenchMode, |
| UInt32 numThreads, |
| const CAffinityMode *affinityMode, |
| #endif |
| const COneMethodInfo &method2, |
| size_t uncompressedDataSize, |
| const Byte *fileData, |
| unsigned generateDictBits, |
| |
| IBenchPrintCallback *printCallback, |
| IBenchCallback *callback, |
| CBenchProps *benchProps) |
| { |
| COneMethodInfo method = method2; |
| UInt64 methodId; |
| UInt32 numStreams; |
| bool isFilter; |
| const int codecIndex = FindMethod_Index( |
| EXTERNAL_CODECS_LOC_VARS |
| method.MethodName, true, |
| methodId, numStreams, isFilter); |
| if (codecIndex < 0) |
| return E_NOTIMPL; |
| if (numStreams != 1) |
| return E_INVALIDARG; |
| |
| UInt32 numEncoderThreads = 1; |
| UInt32 numSubDecoderThreads = 1; |
| |
| #ifndef Z7_ST |
| numEncoderThreads = numThreads; |
| |
| if (oldLzmaBenchMode) |
| if (methodId == k_LZMA) |
| { |
| if (numThreads == 1 && method.Get_NumThreads() < 0) |
| method.AddProp_NumThreads(1); |
| const UInt32 numLzmaThreads = method.Get_Lzma_NumThreads(); |
| if (numThreads > 1 && numLzmaThreads > 1) |
| { |
| numEncoderThreads = (numThreads + 1) / 2; // 20.03 |
| numSubDecoderThreads = 2; |
| } |
| } |
| |
| const bool mtEncMode = (numEncoderThreads > 1) || affinityMode->NeedAffinity(); |
| |
| #endif |
| |
| CBenchEncoders encodersSpec(numEncoderThreads); |
| CEncoderInfo *encoders = encodersSpec.encoders; |
| |
| UInt32 i; |
| |
| for (i = 0; i < numEncoderThreads; i++) |
| { |
| CEncoderInfo &encoder = encoders[i]; |
| encoder.callback = (i == 0) ? callback : NULL; |
| encoder.printCallback = printCallback; |
| |
| #ifndef Z7_ST |
| encoder.EncoderIndex = i; |
| encoder.NumEncoderInternalThreads = numSubDecoderThreads; |
| encoder.AffinityMode = *affinityMode; |
| |
| /* |
| if (numSubDecoderThreads > 1) |
| if (encoder.AffinityMode.NeedAffinity() |
| && encoder.AffinityMode.NumBundleThreads == 1) |
| { |
| // if old LZMA benchmark uses two threads in coder, we increase (NumBundleThreads) for old LZMA benchmark uses two threads instead of one |
| if (encoder.AffinityMode.NumBundleThreads * 2 <= encoder.AffinityMode.NumCores) |
| encoder.AffinityMode.NumBundleThreads *= 2; |
| } |
| */ |
| |
| #endif |
| |
| { |
| CCreatedCoder cod; |
| RINOK(CreateCoder_Index(EXTERNAL_CODECS_LOC_VARS (unsigned)codecIndex, true, encoder._encoderFilter, cod)) |
| encoder._encoder = cod.Coder; |
| if (!encoder._encoder && !encoder._encoderFilter) |
| return E_NOTIMPL; |
| } |
| |
| SetPseudoRand(encoder._iv, sizeof(encoder._iv), 17); |
| SetPseudoRand(encoder._key, sizeof(encoder._key), 51); |
| SetPseudoRand(encoder._psw, sizeof(encoder._psw), 123); |
| |
| for (UInt32 j = 0; j < numSubDecoderThreads; j++) |
| { |
| CCreatedCoder cod; |
| CMyComPtr<ICompressCoder> &decoder = encoder._decoders[j]; |
| RINOK(CreateCoder_Id(EXTERNAL_CODECS_LOC_VARS methodId, false, encoder._decoderFilter, cod)) |
| decoder = cod.Coder; |
| if (!encoder._decoderFilter && !decoder) |
| return E_NOTIMPL; |
| } |
| |
| encoder.UseRealData_Enc = |
| encoder.CheckCrc_Enc = (benchProps->EncComplex) > 30; |
| |
| encoder.CheckCrcMode_Dec = k_CheckCrcMode_Always; |
| if (benchProps->DecComplexCompr + |
| benchProps->DecComplexUnc <= 30) |
| encoder.CheckCrcMode_Dec = |
| k_CheckCrcMode_FirstPass; // for filters |
| // k_CheckCrcMode_Never; // for debug |
| // k_CheckCrcMode_Always; // for debug |
| if (fileData) |
| { |
| encoder.UseRealData_Enc = true; |
| encoder.CheckCrcMode_Dec = k_CheckCrcMode_Always; |
| } |
| } |
| |
| UInt32 crc = 0; |
| if (fileData) |
| crc = CrcCalc(fileData, uncompressedDataSize); |
| |
| for (i = 0; i < numEncoderThreads; i++) |
| { |
| CEncoderInfo &encoder = encoders[i]; |
| encoder._method = method; |
| encoder.generateDictBits = generateDictBits; |
| encoder._uncompressedDataSize = uncompressedDataSize; |
| encoder.kBufferSize = uncompressedDataSize; |
| encoder.fileData = fileData; |
| encoder.crc = crc; |
| } |
| |
| CBenchProgressStatus status; |
| status.Res = S_OK; |
| status.EncodeMode = true; |
| |
| #ifndef Z7_ST |
| CBenchThreadsFlusher encoderFlusher; |
| if (mtEncMode) |
| { |
| WRes wres = encoderFlusher.Common.StartEvent.Create(); |
| if (wres != 0) |
| return HRESULT_FROM_WIN32(wres); |
| encoderFlusher.NumThreads = numEncoderThreads; |
| encoderFlusher.EncodersSpec = &encodersSpec; |
| encoderFlusher.NeedClose = true; |
| } |
| #endif |
| |
| for (i = 0; i < numEncoderThreads; i++) |
| { |
| CEncoderInfo &encoder = encoders[i]; |
| encoder.NumIterations = GetNumIterations(benchProps->GetNumCommands_Enc(uncompressedDataSize), complexInCommands); |
| // encoder.NumIterations = 3; |
| encoder.Salt = g_CrcTable[i & 0xFF]; |
| encoder.Salt ^= (g_CrcTable[(i >> 8) & 0xFF] << 3); |
| // (g_CrcTable[0] == 0), and (encoder.Salt == 0) for first thread |
| // printf(" %8x", encoder.Salt); |
| |
| encoder.KeySize = benchProps->KeySize; |
| |
| for (int j = 0; j < 2; j++) |
| { |
| CBenchProgressInfo *spec = new CBenchProgressInfo; |
| encoder.progressInfoSpec[j] = spec; |
| encoder.progressInfo[j] = spec; |
| spec->Status = &status; |
| } |
| |
| if (i == 0) |
| { |
| CBenchProgressInfo *bpi = encoder.progressInfoSpec[0]; |
| bpi->Callback = callback; |
| bpi->BenchInfo.NumIterations = numEncoderThreads; |
| } |
| |
| #ifndef Z7_ST |
| if (mtEncMode) |
| { |
| #ifdef USE_ALLOCA |
| encoder.AllocaSize = (i * 16 * 21) & 0x7FF; |
| #endif |
| |
| encoder.Common = &encoderFlusher.Common; |
| encoder.IsGlobalMtMode = numEncoderThreads > 1; |
| RINOK(encoder.CreateEncoderThread()) |
| } |
| #endif |
| } |
| |
| if (printCallback) |
| { |
| RINOK(printCallback->CheckBreak()) |
| } |
| |
| #ifndef Z7_ST |
| if (mtEncMode) |
| { |
| for (i = 0; i < numEncoderThreads; i++) |
| { |
| CEncoderInfo &encoder = encoders[i]; |
| const WRes wres = encoder.ReadyEvent.Lock(); |
| if (wres != 0) |
| return HRESULT_FROM_WIN32(wres); |
| RINOK(encoder.Results[0]) |
| } |
| |
| CBenchProgressInfo *bpi = encoders[0].progressInfoSpec[0]; |
| bpi->SetStartTime(); |
| |
| const WRes wres = encoderFlusher.StartAndWait(); |
| if (status.Res == 0 && wres != 0) |
| return HRESULT_FROM_WIN32(wres); |
| } |
| else |
| #endif |
| { |
| RINOK(encoders[0].Encode()) |
| } |
| |
| RINOK(status.Res) |
| |
| CBenchInfo info; |
| |
| encoders[0].progressInfoSpec[0]->SetFinishTime(info); |
| info.UnpackSize = 0; |
| info.PackSize = 0; |
| info.NumIterations = encoders[0].NumIterations; |
| |
| for (i = 0; i < numEncoderThreads; i++) |
| { |
| const CEncoderInfo &encoder = encoders[i]; |
| info.UnpackSize += encoder.kBufferSize; |
| info.PackSize += encoder.compressedSize; |
| // printf("\n%7d\n", encoder.compressedSize); |
| } |
| |
| RINOK(callback->SetEncodeResult(info, true)) |
| |
| |
| |
| |
| // ---------- Decode ---------- |
| |
| status.Res = S_OK; |
| status.EncodeMode = false; |
| |
| const UInt32 numDecoderThreads = numEncoderThreads * numSubDecoderThreads; |
| #ifndef Z7_ST |
| const bool mtDecoderMode = (numDecoderThreads > 1) || affinityMode->NeedAffinity(); |
| #endif |
| |
| for (i = 0; i < numEncoderThreads; i++) |
| { |
| CEncoderInfo &encoder = encoders[i]; |
| |
| /* |
| #ifndef Z7_ST |
| // encoder.affinityMode = *affinityMode; |
| if (encoder.NumEncoderInternalThreads != 1) |
| encoder.AffinityMode.DivideNum = encoder.NumEncoderInternalThreads; |
| #endif |
| */ |
| |
| |
| if (i == 0) |
| { |
| encoder.NumIterations = GetNumIterations( |
| benchProps->GetNumCommands_Dec( |
| encoder.compressedSize, |
| encoder.kBufferSize), |
| complexInCommands); |
| CBenchProgressInfo *bpi = encoder.progressInfoSpec[0]; |
| bpi->Callback = callback; |
| bpi->BenchInfo.NumIterations = numDecoderThreads; |
| bpi->SetStartTime(); |
| } |
| else |
| encoder.NumIterations = encoders[0].NumIterations; |
| |
| #ifndef Z7_ST |
| { |
| int numSubThreads = method.Get_NumThreads(); |
| encoder.NumDecoderSubThreads = (numSubThreads <= 0) ? 1 : (unsigned)numSubThreads; |
| } |
| if (mtDecoderMode) |
| { |
| for (UInt32 j = 0; j < numSubDecoderThreads; j++) |
| { |
| const HRESULT res = encoder.CreateDecoderThread(j, (i == 0 && j == 0) |
| #ifdef USE_ALLOCA |
| , ((i * numSubDecoderThreads + j) * 16 * 21) & 0x7FF |
| #endif |
| ); |
| RINOK(res) |
| } |
| } |
| else |
| #endif |
| { |
| RINOK(encoder.Decode(0)) |
| } |
| } |
| |
| #ifndef Z7_ST |
| if (mtDecoderMode) |
| { |
| WRes wres = 0; |
| HRESULT res = S_OK; |
| for (i = 0; i < numEncoderThreads; i++) |
| for (UInt32 j = 0; j < numSubDecoderThreads; j++) |
| { |
| CEncoderInfo &encoder = encoders[i]; |
| const WRes wres2 = encoder.thread[j]. |
| // Wait(); // later we can get thread times from thread in UNDER_CE |
| Wait_Close(); |
| if (wres == 0 && wres2 != 0) |
| wres = wres2; |
| const HRESULT res2 = encoder.Results[j]; |
| if (res == 0 && res2 != 0) |
| res = res2; |
| } |
| if (wres != 0) |
| return HRESULT_FROM_WIN32(wres); |
| RINOK(res) |
| } |
| #endif // Z7_ST |
| |
| RINOK(status.Res) |
| encoders[0].progressInfoSpec[0]->SetFinishTime(info); |
| |
| /* |
| #ifndef Z7_ST |
| #ifdef UNDER_CE |
| if (mtDecoderMode) |
| for (i = 0; i < numEncoderThreads; i++) |
| for (UInt32 j = 0; j < numSubDecoderThreads; j++) |
| { |
| FILETIME creationTime, exitTime, kernelTime, userTime; |
| if (::GetThreadTimes(encoders[i].thread[j], &creationTime, &exitTime, &kernelTime, &userTime) != 0) |
| info.UserTime += GetTime64(userTime) + GetTime64(kernelTime); |
| } |
| #endif |
| #endif |
| */ |
| |
| info.UnpackSize = 0; |
| info.PackSize = 0; |
| info.NumIterations = numSubDecoderThreads * encoders[0].NumIterations; |
| |
| for (i = 0; i < numEncoderThreads; i++) |
| { |
| const CEncoderInfo &encoder = encoders[i]; |
| info.UnpackSize += encoder.kBufferSize; |
| info.PackSize += encoder.compressedSize; |
| } |
| |
| // RINOK(callback->SetDecodeResult(info, false)) // why we called before 21.03 ?? |
| RINOK(callback->SetDecodeResult(info, true)) |
| |
| return S_OK; |
| } |
| |
| |
| |
| static inline UInt64 GetDictSizeFromLog(unsigned dictSizeLog) |
| { |
| /* |
| if (dictSizeLog < 32) |
| return (UInt32)1 << dictSizeLog; |
| else |
| return (UInt32)(Int32)-1; |
| */ |
| return (UInt64)1 << dictSizeLog; |
| } |
| |
| |
| // it's limit of current LZMA implementation that can be changed later |
| #define kLzmaMaxDictSize ((UInt32)15 << 28) |
| |
| static inline UInt64 GetLZMAUsage(bool multiThread, int btMode, UInt64 dict) |
| { |
| if (dict == 0) |
| dict = 1; |
| if (dict > kLzmaMaxDictSize) |
| dict = kLzmaMaxDictSize; |
| UInt32 hs = (UInt32)dict - 1; |
| hs |= (hs >> 1); |
| hs |= (hs >> 2); |
| hs |= (hs >> 4); |
| hs |= (hs >> 8); |
| hs >>= 1; |
| hs |= 0xFFFF; |
| if (hs > (1 << 24)) |
| hs >>= 1; |
| hs++; |
| hs += (1 << 16); |
| |
| const UInt32 kBlockSizeMax = (UInt32)0 - (UInt32)(1 << 16); |
| UInt64 blockSize = (UInt64)dict + (1 << 16) |
| + (multiThread ? (1 << 20) : 0); |
| blockSize += (blockSize >> (blockSize < ((UInt32)1 << 30) ? 1 : 2)); |
| if (blockSize >= kBlockSizeMax) |
| blockSize = kBlockSizeMax; |
| |
| UInt64 son = (UInt64)dict; |
| if (btMode) |
| son *= 2; |
| const UInt64 v = (hs + son) * 4 + blockSize + |
| (1 << 20) + (multiThread ? (6 << 20) : 0); |
| |
| // printf("\nGetLZMAUsage = %d\n", (UInt32)(v >> 20)); |
| // printf("\nblockSize = %d\n", (UInt32)(blockSize >> 20)); |
| return v; |
| } |
| |
| |
| UInt64 GetBenchMemoryUsage(UInt32 numThreads, int level, UInt64 dictionary, bool totalBench) |
| { |
| const size_t kBufferSize = (size_t)dictionary + kAdditionalSize; |
| const UInt64 kCompressedBufferSize = GetBenchCompressedSize(kBufferSize); // / 2; |
| if (level < 0) |
| level = 5; |
| const int algo = (level < 5 ? 0 : 1); |
| const int btMode = (algo == 0 ? 0 : 1); |
| |
| UInt32 numBigThreads = numThreads; |
| bool lzmaMt = (totalBench || (numThreads > 1 && btMode)); |
| if (btMode) |
| { |
| if (!totalBench && lzmaMt) |
| numBigThreads /= 2; |
| } |
| return ((UInt64)kBufferSize + kCompressedBufferSize + |
| GetLZMAUsage(lzmaMt, btMode, dictionary) + (2 << 20)) * numBigThreads; |
| } |
| |
| static UInt64 GetBenchMemoryUsage_Hash(UInt32 numThreads, UInt64 dictionary) |
| { |
| // dictionary += (dictionary >> 9); // for page tables (virtual memory) |
| return (UInt64)(dictionary + (1 << 15)) * numThreads + (2 << 20); |
| } |
| |
| |
| // ---------- CRC and HASH ---------- |
| |
| struct CCrcInfo_Base |
| { |
| CMidAlignedBuffer Buffer; |
| const Byte *Data; |
| size_t Size; |
| bool CreateLocalBuf; |
| UInt32 CheckSum_Res; |
| |
| CCrcInfo_Base(): CreateLocalBuf(true), CheckSum_Res(0) {} |
| |
| HRESULT Generate(const Byte *data, size_t size); |
| HRESULT CrcProcess(UInt64 numIterations, |
| const UInt32 *checkSum, IHasher *hf, |
| IBenchPrintCallback *callback); |
| }; |
| |
| |
| HRESULT CCrcInfo_Base::Generate(const Byte *data, size_t size) |
| { |
| Size = size; |
| Data = data; |
| if (!data || CreateLocalBuf) |
| { |
| ALLOC_WITH_HRESULT(&Buffer, size) |
| Data = Buffer; |
| } |
| if (!data) |
| RandGen(Buffer, size); |
| else if (CreateLocalBuf && size != 0) |
| memcpy(Buffer, data, size); |
| return S_OK; |
| } |
| |
| |
| HRESULT CCrcInfo_Base::CrcProcess(UInt64 numIterations, |
| const UInt32 *checkSum, IHasher *hf, |
| IBenchPrintCallback *callback) |
| { |
| MY_ALIGN(16) |
| Byte hash[64]; |
| memset(hash, 0, sizeof(hash)); |
| |
| CheckSum_Res = 0; |
| |
| const UInt32 hashSize = hf->GetDigestSize(); |
| if (hashSize > sizeof(hash)) |
| return S_FALSE; |
| |
| const Byte *buf = Data; |
| const size_t size = Size; |
| UInt32 checkSum_Prev = 0; |
| |
| UInt64 prev = 0; |
| UInt64 cur = 0; |
| |
| for (UInt64 i = 0; i < numIterations; i++) |
| { |
| hf->Init(); |
| size_t pos = 0; |
| do |
| { |
| const size_t rem = size - pos; |
| const UInt32 kStep = ((UInt32)1 << 31); |
| const UInt32 curSize = (rem < kStep) ? (UInt32)rem : kStep; |
| hf->Update(buf + pos, curSize); |
| pos += curSize; |
| } |
| while (pos != size); |
| |
| hf->Final(hash); |
| UInt32 sum = 0; |
| for (UInt32 j = 0; j < hashSize; j += 4) |
| { |
| sum = rotlFixed(sum, 11); |
| sum += GetUi32(hash + j); |
| } |
| if (checkSum) |
| { |
| if (sum != *checkSum) |
| return S_FALSE; |
| } |
| else |
| { |
| checkSum_Prev = sum; |
| checkSum = &checkSum_Prev; |
| } |
| if (callback) |
| { |
| cur += size; |
| if (cur - prev >= ((UInt32)1 << 30)) |
| { |
| prev = cur; |
| RINOK(callback->CheckBreak()) |
| } |
| } |
| } |
| CheckSum_Res = checkSum_Prev; |
| return S_OK; |
| } |
| |
| extern |
| UInt32 g_BenchCpuFreqTemp; // we need non-static variavble to disable compiler optimization |
| UInt32 g_BenchCpuFreqTemp = 1; |
| |
| #define YY1 sum += val; sum ^= val; |
| #define YY3 YY1 YY1 YY1 YY1 |
| #define YY5 YY3 YY3 YY3 YY3 |
| #define YY7 YY5 YY5 YY5 YY5 |
| static const UInt32 kNumFreqCommands = 128; |
| |
| EXTERN_C_BEGIN |
| |
| static UInt32 CountCpuFreq(UInt32 sum, UInt32 num, UInt32 val) |
| { |
| for (UInt32 i = 0; i < num; i++) |
| { |
| YY7 |
| } |
| return sum; |
| } |
| |
| EXTERN_C_END |
| |
| |
| #ifndef Z7_ST |
| |
| struct CBaseThreadInfo |
| { |
| NWindows::CThread Thread; |
| IBenchPrintCallback *Callback; |
| HRESULT CallbackRes; |
| |
| WRes Wait_If_Created() |
| { |
| if (!Thread.IsCreated()) |
| return 0; |
| return Thread.Wait_Close(); |
| } |
| }; |
| |
| struct CFreqInfo: public CBaseThreadInfo |
| { |
| UInt32 ValRes; |
| UInt32 Size; |
| UInt64 NumIterations; |
| }; |
| |
| static THREAD_FUNC_DECL FreqThreadFunction(void *param) |
| { |
| CFreqInfo *p = (CFreqInfo *)param; |
| |
| UInt32 sum = g_BenchCpuFreqTemp; |
| for (UInt64 k = p->NumIterations; k > 0; k--) |
| { |
| if (p->Callback) |
| { |
| p->CallbackRes = p->Callback->CheckBreak(); |
| if (p->CallbackRes != S_OK) |
| break; |
| } |
| sum = CountCpuFreq(sum, p->Size, g_BenchCpuFreqTemp); |
| } |
| p->ValRes = sum; |
| return THREAD_FUNC_RET_ZERO; |
| } |
| |
| struct CFreqThreads |
| { |
| CFreqInfo *Items; |
| UInt32 NumThreads; |
| |
| CFreqThreads(): Items(NULL), NumThreads(0) {} |
| |
| WRes WaitAll() |
| { |
| WRes wres = 0; |
| for (UInt32 i = 0; i < NumThreads; i++) |
| { |
| WRes wres2 = Items[i].Wait_If_Created(); |
| if (wres == 0 && wres2 != 0) |
| wres = wres2; |
| } |
| NumThreads = 0; |
| return wres; |
| } |
| |
| ~CFreqThreads() |
| { |
| WaitAll(); |
| delete []Items; |
| } |
| }; |
| |
| |
| static THREAD_FUNC_DECL CrcThreadFunction(void *param); |
| |
| struct CCrcInfo: public CBaseThreadInfo |
| { |
| const Byte *Data; |
| size_t Size; |
| UInt64 NumIterations; |
| bool CheckSumDefined; |
| UInt32 CheckSum; |
| CMyComPtr<IHasher> Hasher; |
| HRESULT Res; |
| UInt32 CheckSum_Res; |
| |
| #ifndef Z7_ST |
| NSynchronization::CManualResetEvent ReadyEvent; |
| UInt32 ThreadIndex; |
| CBenchSyncCommon *Common; |
| CAffinityMode AffinityMode; |
| #endif |
| |
| // we want to call CCrcInfo_Base::Buffer.Free() in main thread. |
| // so we uses non-local CCrcInfo_Base. |
| CCrcInfo_Base crcib; |
| |
| HRESULT CreateThread() |
| { |
| WRes res = 0; |
| if (!ReadyEvent.IsCreated()) |
| res = ReadyEvent.Create(); |
| if (res == 0) |
| res = AffinityMode.CreateThread_WithAffinity(Thread, CrcThreadFunction, this, |
| ThreadIndex); |
| return HRESULT_FROM_WIN32(res); |
| } |
| |
| #ifdef USE_ALLOCA |
| size_t AllocaSize; |
| #endif |
| |
| void Process(); |
| |
| CCrcInfo(): Res(E_FAIL) {} |
| }; |
| |
| static const bool k_Crc_CreateLocalBuf_For_File = true; // for total BW test |
| // static const bool k_Crc_CreateLocalBuf_For_File = false; // for shared memory read test |
| |
| void CCrcInfo::Process() |
| { |
| crcib.CreateLocalBuf = k_Crc_CreateLocalBuf_For_File; |
| // we can use additional Generate() passes to reduce some time effects for new page allocation |
| // for (unsigned y = 0; y < 10; y++) |
| Res = crcib.Generate(Data, Size); |
| |
| // if (Common) |
| { |
| WRes wres = ReadyEvent.Set(); |
| if (wres != 0) |
| { |
| if (Res == 0) |
| Res = HRESULT_FROM_WIN32(wres); |
| return; |
| } |
| if (Res != 0) |
| return; |
| |
| wres = Common->StartEvent.Lock(); |
| |
| if (wres != 0) |
| { |
| Res = HRESULT_FROM_WIN32(wres); |
| return; |
| } |
| if (Common->ExitMode) |
| return; |
| } |
| |
| Res = crcib.CrcProcess(NumIterations, |
| CheckSumDefined ? &CheckSum : NULL, Hasher, |
| Callback); |
| CheckSum_Res = crcib.CheckSum_Res; |
| /* |
| We don't want to include the time of slow CCrcInfo_Base::Buffer.Free() |
| to time of benchmark. So we don't free Buffer here |
| */ |
| // crcib.Buffer.Free(); |
| } |
| |
| |
| static THREAD_FUNC_DECL CrcThreadFunction(void *param) |
| { |
| CCrcInfo *p = (CCrcInfo *)param; |
| |
| #ifdef USE_ALLOCA |
| alloca(p->AllocaSize); |
| #endif |
| p->Process(); |
| return THREAD_FUNC_RET_ZERO; |
| } |
| |
| |
| struct CCrcThreads |
| { |
| CCrcInfo *Items; |
| unsigned NumThreads; |
| CBenchSyncCommon Common; |
| bool NeedClose; |
| |
| CCrcThreads(): Items(NULL), NumThreads(0), NeedClose(false) {} |
| |
| WRes StartAndWait(bool exitMode = false); |
| |
| ~CCrcThreads() |
| { |
| StartAndWait(true); |
| delete []Items; |
| } |
| }; |
| |
| |
| WRes CCrcThreads::StartAndWait(bool exitMode) |
| { |
| if (!NeedClose) |
| return 0; |
| |
| Common.ExitMode = exitMode; |
| WRes wres = Common.StartEvent.Set(); |
| |
| for (unsigned i = 0; i < NumThreads; i++) |
| { |
| WRes wres2 = Items[i].Wait_If_Created(); |
| if (wres == 0 && wres2 != 0) |
| wres = wres2; |
| } |
| NumThreads = 0; |
| NeedClose = false; |
| return wres; |
| } |
| |
| #endif |
| |
| |
| static UInt32 CrcCalc1(const Byte *buf, size_t size) |
| { |
| UInt32 crc = CRC_INIT_VAL; |
| for (size_t i = 0; i < size; i++) |
| crc = CRC_UPDATE_BYTE(crc, buf[i]); |
| return CRC_GET_DIGEST(crc); |
| } |
| |
| /* |
| static UInt32 RandGenCrc(Byte *buf, size_t size, CBaseRandomGenerator &RG) |
| { |
| RandGen(buf, size, RG); |
| return CrcCalc1(buf, size); |
| } |
| */ |
| |
| static bool CrcInternalTest() |
| { |
| CAlignedBuffer buffer; |
| const size_t kBufferSize0 = (1 << 8); |
| const size_t kBufferSize1 = (1 << 10); |
| const unsigned kCheckSize = (1 << 5); |
| buffer.Alloc(kBufferSize0 + kBufferSize1); |
| if (!buffer.IsAllocated()) |
| return false; |
| Byte *buf = (Byte *)buffer; |
| size_t i; |
| for (i = 0; i < kBufferSize0; i++) |
| buf[i] = (Byte)i; |
| UInt32 crc1 = CrcCalc1(buf, kBufferSize0); |
| if (crc1 != 0x29058C73) |
| return false; |
| RandGen(buf + kBufferSize0, kBufferSize1); |
| for (i = 0; i < kBufferSize0 + kBufferSize1 - kCheckSize; i++) |
| for (unsigned j = 0; j < kCheckSize; j++) |
| if (CrcCalc1(buf + i, j) != CrcCalc(buf + i, j)) |
| return false; |
| return true; |
| } |
| |
| struct CBenchMethod |
| { |
| unsigned Weight; |
| unsigned DictBits; |
| Int32 EncComplex; |
| Int32 DecComplexCompr; |
| Int32 DecComplexUnc; |
| const char *Name; |
| // unsigned KeySize; |
| }; |
| |
| // #define USE_SW_CMPLX |
| |
| #ifdef USE_SW_CMPLX |
| #define CMPLX(x) ((x) * 1000) |
| #else |
| #define CMPLX(x) (x) |
| #endif |
| |
| static const CBenchMethod g_Bench[] = |
| { |
| // { 40, 17, 357, 145, 20, "LZMA:x1" }, |
| // { 20, 18, 360, 145, 20, "LZMA2:x1:mt2" }, |
| |
| { 20, 18, 360, 145, 20, "LZMA:x1" }, |
| { 20, 22, 600, 145, 20, "LZMA:x3" }, |
| |
| { 80, 24, 1220, 145, 20, "LZMA:x5:mt1" }, |
| { 80, 24, 1220, 145, 20, "LZMA:x5:mt2" }, |
| |
| { 10, 16, 124, 40, 14, "Deflate:x1" }, |
| { 20, 16, 376, 40, 14, "Deflate:x5" }, |
| { 10, 16, 1082, 40, 14, "Deflate:x7" }, |
| { 10, 17, 422, 40, 14, "Deflate64:x5" }, |
| |
| { 10, 15, 590, 69, 69, "BZip2:x1" }, |
| { 20, 19, 815, 122, 122, "BZip2:x5" }, |
| { 10, 19, 815, 122, 122, "BZip2:x5:mt2" }, |
| { 10, 19, 2530, 122, 122, "BZip2:x7" }, |
| |
| // { 10, 18, 1010, 0, 1150, "PPMDZip:x1" }, |
| { 10, 18, 1010, 0, 1150, "PPMD:x1" }, |
| // { 10, 22, 1655, 0, 1830, "PPMDZip:x5" }, |
| { 10, 22, 1655, 0, 1830, "PPMD:x5" }, |
| |
| // { 2, 0, -16, 0, -16, "Swap2" }, |
| { 2, 0, -16, 0, -16, "Swap4" }, |
| |
| // { 2, 0, 3, 0, 4, "Delta:1" }, |
| // { 2, 0, 3, 0, 4, "Delta:2" }, |
| // { 2, 0, 3, 0, 4, "Delta:3" }, |
| { 2, 0, 3, 0, 4, "Delta:4" }, |
| // { 2, 0, 3, 0, 4, "Delta:8" }, |
| // { 2, 0, 3, 0, 4, "Delta:32" }, |
| |
| { 2, 0, 2, 0, 2, "BCJ" }, |
| { 2, 0, 1, 0, 1, "ARM64" }, |
| |
| // { 10, 0, 18, 0, 18, "AES128CBC:1" }, |
| // { 10, 0, 21, 0, 21, "AES192CBC:1" }, |
| { 10, 0, 24, 0, 24, "AES256CBC:1" }, |
| |
| // { 10, 0, 18, 0, 18, "AES128CTR:1" }, |
| // { 10, 0, 21, 0, 21, "AES192CTR:1" }, |
| // { 10, 0, 24, 0, 24, "AES256CTR:1" }, |
| // { 2, 0, CMPLX(6), 0, CMPLX(1), "AES128CBC:2" }, |
| // { 2, 0, CMPLX(7), 0, CMPLX(1), "AES192CBC:2" }, |
| { 2, 0, CMPLX(8), 0, CMPLX(1), "AES256CBC:2" }, |
| |
| // { 2, 0, CMPLX(1), 0, CMPLX(1), "AES128CTR:2" }, |
| // { 2, 0, CMPLX(1), 0, CMPLX(1), "AES192CTR:2" }, |
| // { 2, 0, CMPLX(1), 0, CMPLX(1), "AES256CTR:2" }, |
| |
| // { 1, 0, CMPLX(6), 0, CMPLX(1), "AES128CBC:3" }, |
| // { 1, 0, CMPLX(7), 0, CMPLX(1), "AES192CBC:3" }, |
| { 1, 0, CMPLX(8), 0, CMPLX(1), "AES256CBC:3" } |
| |
| // { 1, 0, CMPLX(1), 0, CMPLX(1), "AES128CTR:3" }, |
| // { 1, 0, CMPLX(1), 0, CMPLX(1), "AES192CTR:3" }, |
| // { 1, 0, CMPLX(1), 0, CMPLX(1), "AES256CTR:3" }, |
| }; |
| |
| struct CBenchHash |
| { |
| unsigned Weight; |
| UInt32 Complex; |
| UInt32 CheckSum; |
| const char *Name; |
| }; |
| |
| // #define ARM_CRC_MUL 100 |
| #define ARM_CRC_MUL 1 |
| |
| #define k_Hash_Complex_Mult 256 |
| |
| static const CBenchHash g_Hash[] = |
| { |
| // { 1, 1820, 0x21e207bb, "CRC32:1" }, |
| // { 10, 558, 0x21e207bb, "CRC32:4" }, |
| { 20, 339, 0x21e207bb, "CRC32:8" } , |
| { 2, 128 *ARM_CRC_MUL, 0x21e207bb, "CRC32:32" }, |
| { 2, 64 *ARM_CRC_MUL, 0x21e207bb, "CRC32:64" }, |
| { 10, 512, 0x41b901d1, "CRC64" }, |
| |
| { 10, 5100, 0x7913ba03, "SHA256:1" }, |
| { 2, CMPLX((32 * 4 + 1) * 4 + 4), 0x7913ba03, "SHA256:2" }, |
| |
| { 10, 2340, 0xff769021, "SHA1:1" }, |
| { 2, CMPLX((20 * 6 + 1) * 4 + 4), 0xff769021, "SHA1:2" }, |
| |
| { 2, 5500, 0x85189d02, "BLAKE2sp" } |
| }; |
| |
| static void PrintNumber(IBenchPrintCallback &f, UInt64 value, unsigned size) |
| { |
| char s[128]; |
| unsigned startPos = (unsigned)sizeof(s) - 32; |
| memset(s, ' ', startPos); |
| ConvertUInt64ToString(value, s + startPos); |
| // if (withSpace) |
| { |
| startPos--; |
| size++; |
| } |
| unsigned len = (unsigned)strlen(s + startPos); |
| if (size > len) |
| { |
| size -= len; |
| if (startPos < size) |
| startPos = 0; |
| else |
| startPos -= size; |
| } |
| f.Print(s + startPos); |
| } |
| |
| static const unsigned kFieldSize_Name = 12; |
| static const unsigned kFieldSize_SmallName = 4; |
| static const unsigned kFieldSize_Speed = 9; |
| static const unsigned kFieldSize_Usage = 5; |
| static const unsigned kFieldSize_RU = 6; |
| static const unsigned kFieldSize_Rating = 6; |
| static const unsigned kFieldSize_EU = 5; |
| static const unsigned kFieldSize_Effec = 5; |
| static const unsigned kFieldSize_CrcSpeed = 8; |
| |
| |
| static const unsigned kFieldSize_TotalSize = 4 + kFieldSize_Speed + kFieldSize_Usage + kFieldSize_RU + kFieldSize_Rating; |
| static const unsigned kFieldSize_EUAndEffec = 2 + kFieldSize_EU + kFieldSize_Effec; |
| |
| |
| static void PrintRating(IBenchPrintCallback &f, UInt64 rating, unsigned size) |
| { |
| PrintNumber(f, (rating + 500000) / 1000000, size); |
| } |
| |
| |
| static void PrintPercents(IBenchPrintCallback &f, UInt64 val, UInt64 divider, unsigned size) |
| { |
| UInt64 v = 0; |
| if (divider != 0) |
| v = (val * 100 + divider / 2) / divider; |
| PrintNumber(f, v, size); |
| } |
| |
| static void PrintChars(IBenchPrintCallback &f, char c, unsigned size) |
| { |
| char s[256]; |
| memset(s, (Byte)c, size); |
| s[size] = 0; |
| f.Print(s); |
| } |
| |
| static void PrintSpaces(IBenchPrintCallback &f, unsigned size) |
| { |
| PrintChars(f, ' ', size); |
| } |
| |
| static void PrintUsage(IBenchPrintCallback &f, UInt64 usage, unsigned size) |
| { |
| PrintNumber(f, Benchmark_GetUsage_Percents(usage), size); |
| } |
| |
| static void PrintResults(IBenchPrintCallback &f, UInt64 usage, UInt64 rpu, UInt64 rating, bool showFreq, UInt64 cpuFreq) |
| { |
| PrintUsage(f, usage, kFieldSize_Usage); |
| PrintRating(f, rpu, kFieldSize_RU); |
| PrintRating(f, rating, kFieldSize_Rating); |
| if (showFreq) |
| { |
| if (cpuFreq == 0) |
| PrintSpaces(f, kFieldSize_EUAndEffec); |
| else |
| { |
| PrintPercents(f, rating, cpuFreq * usage / kBenchmarkUsageMult, kFieldSize_EU); |
| PrintPercents(f, rating, cpuFreq, kFieldSize_Effec); |
| } |
| } |
| } |
| |
| |
| void CTotalBenchRes::Generate_From_BenchInfo(const CBenchInfo &info) |
| { |
| Speed = info.GetUnpackSizeSpeed(); |
| Usage = info.GetUsage(); |
| RPU = info.GetRatingPerUsage(Rating); |
| } |
| |
| void CTotalBenchRes::Mult_For_Weight(unsigned weight) |
| { |
| NumIterations2 *= weight; |
| RPU *= weight; |
| Rating *= weight; |
| Usage *= weight; |
| Speed *= weight; |
| } |
| |
| void CTotalBenchRes::Update_With_Res(const CTotalBenchRes &r) |
| { |
| Rating += r.Rating; |
| Usage += r.Usage; |
| RPU += r.RPU; |
| Speed += r.Speed; |
| // NumIterations1 = (r1.NumIterations1 + r2.NumIterations1); |
| NumIterations2 += r.NumIterations2; |
| } |
| |
| static void PrintResults(IBenchPrintCallback *f, |
| const CBenchInfo &info, |
| unsigned weight, |
| UInt64 rating, |
| bool showFreq, UInt64 cpuFreq, |
| CTotalBenchRes *res) |
| { |
| CTotalBenchRes t; |
| t.Rating = rating; |
| t.NumIterations2 = 1; |
| t.Generate_From_BenchInfo(info); |
| |
| if (f) |
| { |
| if (t.Speed != 0) |
| PrintNumber(*f, t.Speed / 1024, kFieldSize_Speed); |
| else |
| PrintSpaces(*f, 1 + kFieldSize_Speed); |
| } |
| if (f) |
| { |
| PrintResults(*f, t.Usage, t.RPU, rating, showFreq, cpuFreq); |
| } |
| |
| if (res) |
| { |
| // res->NumIterations1++; |
| t.Mult_For_Weight(weight); |
| res->Update_With_Res(t); |
| } |
| } |
| |
| static void PrintTotals(IBenchPrintCallback &f, |
| bool showFreq, UInt64 cpuFreq, bool showSpeed, const CTotalBenchRes &res) |
| { |
| const UInt64 numIterations2 = res.NumIterations2 ? res.NumIterations2 : 1; |
| const UInt64 speed = res.Speed / numIterations2; |
| if (showSpeed && speed != 0) |
| PrintNumber(f, speed / 1024, kFieldSize_Speed); |
| else |
| PrintSpaces(f, 1 + kFieldSize_Speed); |
| |
| // PrintSpaces(f, 1 + kFieldSize_Speed); |
| // UInt64 numIterations1 = res.NumIterations1; if (numIterations1 == 0) numIterations1 = 1; |
| PrintResults(f, res.Usage / numIterations2, res.RPU / numIterations2, res.Rating / numIterations2, showFreq, cpuFreq); |
| } |
| |
| |
| static void PrintHex(AString &s, UInt64 v) |
| { |
| char temp[32]; |
| ConvertUInt64ToHex(v, temp); |
| s += temp; |
| } |
| |
| AString GetProcessThreadsInfo(const NSystem::CProcessAffinity &ti) |
| { |
| AString s; |
| // s.Add_UInt32(ti.numProcessThreads); |
| unsigned numSysThreads = ti.GetNumSystemThreads(); |
| if (ti.GetNumProcessThreads() != numSysThreads) |
| { |
| // if (ti.numProcessThreads != ti.numSysThreads) |
| { |
| s += " / "; |
| s.Add_UInt32(numSysThreads); |
| } |
| s += " : "; |
| #ifdef _WIN32 |
| PrintHex(s, ti.processAffinityMask); |
| s += " / "; |
| PrintHex(s, ti.systemAffinityMask); |
| #else |
| unsigned i = (numSysThreads + 3) & ~(unsigned)3; |
| if (i == 0) |
| i = 4; |
| for (; i >= 4; ) |
| { |
| i -= 4; |
| unsigned val = 0; |
| for (unsigned k = 0; k < 4; k++) |
| { |
| const unsigned bit = (ti.IsCpuSet(i + k) ? 1 : 0); |
| val += (bit << k); |
| } |
| PrintHex(s, val); |
| } |
| #endif |
| } |
| return s; |
| } |
| |
| |
| #ifdef Z7_LARGE_PAGES |
| |
| #ifdef _WIN32 |
| extern bool g_LargePagesMode; |
| extern "C" |
| { |
| extern SIZE_T g_LargePageSize; |
| } |
| #endif |
| |
| void Add_LargePages_String(AString &s) |
| { |
| #ifdef _WIN32 |
| if (g_LargePagesMode || g_LargePageSize != 0) |
| { |
| s.Add_OptSpaced("(LP-"); |
| PrintSize_KMGT_Or_Hex(s, g_LargePageSize); |
| #ifdef MY_CPU_X86_OR_AMD64 |
| if (CPU_IsSupported_PageGB()) |
| s += "-1G"; |
| #endif |
| if (!g_LargePagesMode) |
| s += "-NA"; |
| s += ")"; |
| } |
| #else |
| s += ""; |
| #endif |
| } |
| |
| #endif |
| |
| |
| |
| static void PrintRequirements(IBenchPrintCallback &f, const char *sizeString, |
| bool size_Defined, UInt64 size, const char *threadsString, UInt32 numThreads) |
| { |
| f.Print("RAM "); |
| f.Print(sizeString); |
| if (size_Defined) |
| PrintNumber(f, (size >> 20), 6); |
| else |
| f.Print(" ?"); |
| f.Print(" MB"); |
| |
| #ifdef Z7_LARGE_PAGES |
| { |
| AString s; |
| Add_LargePages_String(s); |
| f.Print(s); |
| } |
| #endif |
| |
| f.Print(", # "); |
| f.Print(threadsString); |
| PrintNumber(f, numThreads, 3); |
| } |
| |
| |
| |
| struct CBenchCallbackToPrint Z7_final: public IBenchCallback |
| { |
| bool NeedPrint; |
| bool Use2Columns; |
| bool ShowFreq; |
| unsigned NameFieldSize; |
| |
| unsigned EncodeWeight; |
| unsigned DecodeWeight; |
| |
| UInt64 CpuFreq; |
| UInt64 DictSize; |
| |
| IBenchPrintCallback *_file; |
| CBenchProps BenchProps; |
| CTotalBenchRes EncodeRes; |
| CTotalBenchRes DecodeRes; |
| |
| CBenchInfo BenchInfo_Results[2]; |
| |
| CBenchCallbackToPrint(): |
| NeedPrint(true), |
| Use2Columns(false), |
| ShowFreq(false), |
| NameFieldSize(0), |
| EncodeWeight(1), |
| DecodeWeight(1), |
| CpuFreq(0) |
| {} |
| |
| void Init() { EncodeRes.Init(); DecodeRes.Init(); } |
| void Print(const char *s); |
| void NewLine(); |
| |
| HRESULT SetFreq(bool showFreq, UInt64 cpuFreq); |
| HRESULT SetEncodeResult(const CBenchInfo &info, bool final) Z7_override; |
| HRESULT SetDecodeResult(const CBenchInfo &info, bool final) Z7_override; |
| }; |
| |
| HRESULT CBenchCallbackToPrint::SetFreq(bool showFreq, UInt64 cpuFreq) |
| { |
| ShowFreq = showFreq; |
| CpuFreq = cpuFreq; |
| return S_OK; |
| } |
| |
| HRESULT CBenchCallbackToPrint::SetEncodeResult(const CBenchInfo &info, bool final) |
| { |
| RINOK(_file->CheckBreak()) |
| if (final) |
| BenchInfo_Results[0] = info; |
| if (final) |
| if (NeedPrint) |
| { |
| const UInt64 rating = BenchProps.GetRating_Enc(DictSize, info.GlobalTime, info.GlobalFreq, info.UnpackSize * info.NumIterations); |
| PrintResults(_file, info, |
| EncodeWeight, rating, |
| ShowFreq, CpuFreq, &EncodeRes); |
| if (!Use2Columns) |
| _file->NewLine(); |
| } |
| return S_OK; |
| } |
| |
| static const char * const kSep = " | "; |
| |
| HRESULT CBenchCallbackToPrint::SetDecodeResult(const CBenchInfo &info, bool final) |
| { |
| RINOK(_file->CheckBreak()) |
| if (final) |
| BenchInfo_Results[1] = info; |
| if (final) |
| if (NeedPrint) |
| { |
| const UInt64 rating = BenchProps.GetRating_Dec(info.GlobalTime, info.GlobalFreq, info.UnpackSize, info.PackSize, info.NumIterations); |
| if (Use2Columns) |
| _file->Print(kSep); |
| else |
| PrintSpaces(*_file, NameFieldSize); |
| CBenchInfo info2 = info; |
| info2.UnpackSize *= info2.NumIterations; |
| info2.PackSize *= info2.NumIterations; |
| info2.NumIterations = 1; |
| PrintResults(_file, info2, |
| DecodeWeight, rating, |
| ShowFreq, CpuFreq, &DecodeRes); |
| } |
| return S_OK; |
| } |
| |
| void CBenchCallbackToPrint::Print(const char *s) |
| { |
| _file->Print(s); |
| } |
| |
| void CBenchCallbackToPrint::NewLine() |
| { |
| _file->NewLine(); |
| } |
| |
| static void PrintLeft(IBenchPrintCallback &f, const char *s, unsigned size) |
| { |
| f.Print(s); |
| int numSpaces = (int)size - (int)MyStringLen(s); |
| if (numSpaces > 0) |
| PrintSpaces(f, (unsigned)numSpaces); |
| } |
| |
| static void PrintRight(IBenchPrintCallback &f, const char *s, unsigned size) |
| { |
| int numSpaces = (int)size - (int)MyStringLen(s); |
| if (numSpaces > 0) |
| PrintSpaces(f, (unsigned)numSpaces); |
| f.Print(s); |
| } |
| |
| |
| static bool DoesWildcardMatchName_NoCase(const AString &mask, const char *name) |
| { |
| UString wildc = GetUnicodeString(mask); |
| UString bname = GetUnicodeString(name); |
| wildc.MakeLower_Ascii(); |
| bname.MakeLower_Ascii(); |
| return DoesWildcardMatchName(wildc, bname); |
| } |
| |
| |
| static HRESULT TotalBench( |
| DECL_EXTERNAL_CODECS_LOC_VARS |
| const COneMethodInfo &methodMask, |
| UInt64 complexInCommands, |
| #ifndef Z7_ST |
| UInt32 numThreads, |
| const CAffinityMode *affinityMode, |
| #endif |
| bool forceUnpackSize, |
| size_t unpackSize, |
| const Byte *fileData, |
| IBenchPrintCallback *printCallback, CBenchCallbackToPrint *callback) |
| { |
| for (unsigned i = 0; i < Z7_ARRAY_SIZE(g_Bench); i++) |
| { |
| const CBenchMethod &bench = g_Bench[i]; |
| if (!DoesWildcardMatchName_NoCase(methodMask.MethodName, bench.Name)) |
| continue; |
| PrintLeft(*callback->_file, bench.Name, kFieldSize_Name); |
| { |
| unsigned keySize = 32; |
| if (IsString1PrefixedByString2(bench.Name, "AES128")) keySize = 16; |
| else if (IsString1PrefixedByString2(bench.Name, "AES192")) keySize = 24; |
| callback->BenchProps.KeySize = keySize; |
| } |
| callback->BenchProps.DecComplexUnc = bench.DecComplexUnc; |
| callback->BenchProps.DecComplexCompr = bench.DecComplexCompr; |
| callback->BenchProps.EncComplex = bench.EncComplex; |
| |
| COneMethodInfo method; |
| NCOM::CPropVariant propVariant; |
| propVariant = bench.Name; |
| RINOK(method.ParseMethodFromPROPVARIANT(UString(), propVariant)) |
| |
| size_t unpackSize2 = unpackSize; |
| if (!forceUnpackSize && bench.DictBits == 0) |
| unpackSize2 = kFilterUnpackSize; |
| |
| callback->EncodeWeight = bench.Weight; |
| callback->DecodeWeight = bench.Weight; |
| |
| const HRESULT res = MethodBench( |
| EXTERNAL_CODECS_LOC_VARS |
| complexInCommands, |
| #ifndef Z7_ST |
| false, numThreads, affinityMode, |
| #endif |
| method, |
| unpackSize2, fileData, |
| bench.DictBits, |
| printCallback, callback, &callback->BenchProps); |
| |
| if (res == E_NOTIMPL) |
| { |
| // callback->Print(" ---"); |
| // we need additional empty line as line for decompression results |
| if (!callback->Use2Columns) |
| callback->NewLine(); |
| } |
| else |
| { |
| RINOK(res) |
| } |
| |
| callback->NewLine(); |
| } |
| return S_OK; |
| } |
| |
| |
| struct CFreqBench |
| { |
| // in: |
| UInt64 complexInCommands; |
| UInt32 numThreads; |
| bool showFreq; |
| UInt64 specifiedFreq; |
| |
| // out: |
| UInt64 CpuFreqRes; |
| UInt64 UsageRes; |
| UInt32 res; |
| |
| CFreqBench() |
| {} |
| |
| HRESULT FreqBench(IBenchPrintCallback *_file |
| #ifndef Z7_ST |
| , const CAffinityMode *affinityMode |
| #endif |
| ); |
| }; |
| |
| |
| HRESULT CFreqBench::FreqBench(IBenchPrintCallback *_file |
| #ifndef Z7_ST |
| , const CAffinityMode *affinityMode |
| #endif |
| ) |
| { |
| res = 0; |
| CpuFreqRes = 0; |
| UsageRes = 0; |
| |
| if (numThreads == 0) |
| numThreads = 1; |
| |
| #ifdef Z7_ST |
| numThreads = 1; |
| #endif |
| |
| const UInt32 complexity = kNumFreqCommands; |
| UInt64 numIterations = complexInCommands / complexity; |
| UInt32 numIterations2 = 1 << 30; |
| if (numIterations > numIterations2) |
| numIterations /= numIterations2; |
| else |
| { |
| numIterations2 = (UInt32)numIterations; |
| numIterations = 1; |
| } |
| |
| CBenchInfoCalc progressInfoSpec; |
| |
| #ifndef Z7_ST |
| |
| bool mtMode = (numThreads > 1) || affinityMode->NeedAffinity(); |
| |
| if (mtMode) |
| { |
| CFreqThreads threads; |
| threads.Items = new CFreqInfo[numThreads]; |
| UInt32 i; |
| for (i = 0; i < numThreads; i++) |
| { |
| CFreqInfo &info = threads.Items[i]; |
| info.Callback = _file; |
| info.CallbackRes = S_OK; |
| info.NumIterations = numIterations; |
| info.Size = numIterations2; |
| } |
| progressInfoSpec.SetStartTime(); |
| for (i = 0; i < numThreads; i++) |
| { |
| // Sleep(10); |
| CFreqInfo &info = threads.Items[i]; |
| WRes wres = affinityMode->CreateThread_WithAffinity(info.Thread, FreqThreadFunction, &info, i); |
| if (info.Thread.IsCreated()) |
| threads.NumThreads++; |
| if (wres != 0) |
| return HRESULT_FROM_WIN32(wres); |
| } |
| WRes wres = threads.WaitAll(); |
| if (wres != 0) |
| return HRESULT_FROM_WIN32(wres); |
| for (i = 0; i < numThreads; i++) |
| { |
| RINOK(threads.Items[i].CallbackRes) |
| } |
| } |
| else |
| #endif |
| { |
| progressInfoSpec.SetStartTime(); |
| UInt32 sum = g_BenchCpuFreqTemp; |
| for (UInt64 k = numIterations; k > 0; k--) |
| { |
| sum = CountCpuFreq(sum, numIterations2, g_BenchCpuFreqTemp); |
| if (_file) |
| { |
| RINOK(_file->CheckBreak()) |
| } |
| } |
| res += sum; |
| } |
| |
| if (res == 0x12345678) |
| if (_file) |
| { |
| RINOK(_file->CheckBreak()) |
| } |
| |
| CBenchInfo info; |
| progressInfoSpec.SetFinishTime(info); |
| |
| info.UnpackSize = 0; |
| info.PackSize = 0; |
| info.NumIterations = 1; |
| |
| const UInt64 numCommands = (UInt64)numIterations * numIterations2 * numThreads * complexity; |
| const UInt64 rating = info.GetSpeed(numCommands); |
| CpuFreqRes = rating / numThreads; |
| UsageRes = info.GetUsage(); |
| |
| if (_file) |
| { |
| PrintResults(_file, info, |
| 0, // weight |
| rating, |
| showFreq, showFreq ? (specifiedFreq != 0 ? specifiedFreq : CpuFreqRes) : 0, NULL); |
| RINOK(_file->CheckBreak()) |
| } |
| |
| return S_OK; |
| } |
| |
| |
| |
| static HRESULT CrcBench( |
| DECL_EXTERNAL_CODECS_LOC_VARS |
| UInt64 complexInCommands, |
| UInt32 numThreads, |
| const size_t bufferSize, |
| const Byte *fileData, |
| |
| UInt64 &speed, |
| UInt64 &usage, |
| |
| UInt32 complexity, unsigned benchWeight, |
| const UInt32 *checkSum, |
| const COneMethodInfo &method, |
| IBenchPrintCallback *_file, |
| #ifndef Z7_ST |
| const CAffinityMode *affinityMode, |
| #endif |
| bool showRating, |
| CTotalBenchRes *encodeRes, |
| bool showFreq, UInt64 cpuFreq) |
| { |
| if (numThreads == 0) |
| numThreads = 1; |
| |
| #ifdef Z7_ST |
| numThreads = 1; |
| #endif |
| |
| const AString &methodName = method.MethodName; |
| // methodName.RemoveChar(L'-'); |
| CMethodId hashID; |
| if (!FindHashMethod( |
| EXTERNAL_CODECS_LOC_VARS |
| methodName, hashID)) |
| return E_NOTIMPL; |
| |
| /* |
| // if will generate random data in each thread, instead of global data |
| CMidAlignedBuffer buffer; |
| if (!fileData) |
| { |
| ALLOC_WITH_HRESULT(&buffer, bufferSize) |
| RandGen(buffer, bufferSize); |
| fileData = buffer; |
| } |
| */ |
| |
| const size_t bsize = (bufferSize == 0 ? 1 : bufferSize); |
| UInt64 numIterations = complexInCommands * k_Hash_Complex_Mult / complexity / bsize; |
| if (numIterations == 0) |
| numIterations = 1; |
| |
| CBenchInfoCalc progressInfoSpec; |
| CBenchInfo info; |
| |
| #ifndef Z7_ST |
| bool mtEncMode = (numThreads > 1) || affinityMode->NeedAffinity(); |
| |
| if (mtEncMode) |
| { |
| CCrcThreads threads; |
| threads.Items = new CCrcInfo[numThreads]; |
| { |
| WRes wres = threads.Common.StartEvent.Create(); |
| if (wres != 0) |
| return HRESULT_FROM_WIN32(wres); |
| threads.NeedClose = true; |
| } |
| |
| UInt32 i; |
| for (i = 0; i < numThreads; i++) |
| { |
| CCrcInfo &ci = threads.Items[i]; |
| AString name; |
| RINOK(CreateHasher(EXTERNAL_CODECS_LOC_VARS hashID, name, ci.Hasher)) |
| if (!ci.Hasher) |
| return E_NOTIMPL; |
| CMyComPtr<ICompressSetCoderProperties> scp; |
| ci.Hasher.QueryInterface(IID_ICompressSetCoderProperties, &scp); |
| if (scp) |
| { |
| RINOK(method.SetCoderProps(scp)) |
| } |
| |
| ci.Callback = _file; |
| ci.Data = fileData; |
| ci.NumIterations = numIterations; |
| ci.Size = bufferSize; |
| ci.CheckSumDefined = false; |
| if (checkSum) |
| { |
| ci.CheckSum = *checkSum; |
| ci.CheckSumDefined = true; |
| } |
| |
| #ifdef USE_ALLOCA |
| ci.AllocaSize = (i * 16 * 21) & 0x7FF; |
| #endif |
| } |
| |
| for (i = 0; i < numThreads; i++) |
| { |
| CCrcInfo &ci = threads.Items[i]; |
| ci.ThreadIndex = i; |
| ci.Common = &threads.Common; |
| ci.AffinityMode = *affinityMode; |
| HRESULT hres = ci.CreateThread(); |
| if (ci.Thread.IsCreated()) |
| threads.NumThreads++; |
| if (hres != 0) |
| return hres; |
| } |
| |
| for (i = 0; i < numThreads; i++) |
| { |
| CCrcInfo &ci = threads.Items[i]; |
| WRes wres = ci.ReadyEvent.Lock(); |
| if (wres != 0) |
| return HRESULT_FROM_WIN32(wres); |
| RINOK(ci.Res) |
| } |
| |
| progressInfoSpec.SetStartTime(); |
| |
| WRes wres = threads.StartAndWait(); |
| if (wres != 0) |
| return HRESULT_FROM_WIN32(wres); |
| |
| progressInfoSpec.SetFinishTime(info); |
| |
| for (i = 0; i < numThreads; i++) |
| { |
| RINOK(threads.Items[i].Res) |
| if (i != 0) |
| if (threads.Items[i].CheckSum_Res != |
| threads.Items[i - 1].CheckSum_Res) |
| return S_FALSE; |
| } |
| } |
| else |
| #endif |
| { |
| CMyComPtr<IHasher> hasher; |
| AString name; |
| RINOK(CreateHasher(EXTERNAL_CODECS_LOC_VARS hashID, name, hasher)) |
| if (!hasher) |
| return E_NOTIMPL; |
| CMyComPtr<ICompressSetCoderProperties> scp; |
| hasher.QueryInterface(IID_ICompressSetCoderProperties, &scp); |
| if (scp) |
| { |
| RINOK(method.SetCoderProps(scp)) |
| } |
| CCrcInfo_Base crcib; |
| crcib.CreateLocalBuf = false; |
| RINOK(crcib.Generate(fileData, bufferSize)) |
| progressInfoSpec.SetStartTime(); |
| RINOK(crcib.CrcProcess(numIterations, checkSum, hasher, _file)) |
| progressInfoSpec.SetFinishTime(info); |
| } |
| |
| |
| UInt64 unpSize = numIterations * bufferSize; |
| UInt64 unpSizeThreads = unpSize * numThreads; |
| info.UnpackSize = unpSizeThreads; |
| info.PackSize = unpSizeThreads; |
| info.NumIterations = 1; |
| |
| if (_file) |
| { |
| if (showRating) |
| { |
| UInt64 unpSizeThreads2 = unpSizeThreads; |
| if (unpSizeThreads2 == 0) |
| unpSizeThreads2 = numIterations * 1 * numThreads; |
| const UInt64 numCommands = unpSizeThreads2 * complexity / 256; |
| const UInt64 rating = info.GetSpeed(numCommands); |
| PrintResults(_file, info, |
| benchWeight, rating, |
| showFreq, cpuFreq, encodeRes); |
| } |
| RINOK(_file->CheckBreak()) |
| } |
| |
| speed = info.GetSpeed(unpSizeThreads); |
| usage = info.GetUsage(); |
| |
| return S_OK; |
| } |
| |
| |
| |
| static HRESULT TotalBench_Hash( |
| DECL_EXTERNAL_CODECS_LOC_VARS |
| const COneMethodInfo &methodMask, |
| UInt64 complexInCommands, |
| UInt32 numThreads, |
| size_t bufSize, |
| const Byte *fileData, |
| IBenchPrintCallback *printCallback, CBenchCallbackToPrint *callback, |
| #ifndef Z7_ST |
| const CAffinityMode *affinityMode, |
| #endif |
| CTotalBenchRes *encodeRes, |
| bool showFreq, UInt64 cpuFreq) |
| { |
| for (unsigned i = 0; i < Z7_ARRAY_SIZE(g_Hash); i++) |
| { |
| const CBenchHash &bench = g_Hash[i]; |
| if (!DoesWildcardMatchName_NoCase(methodMask.MethodName, bench.Name)) |
| continue; |
| PrintLeft(*callback->_file, bench.Name, kFieldSize_Name); |
| // callback->BenchProps.DecComplexUnc = bench.DecComplexUnc; |
| // callback->BenchProps.DecComplexCompr = bench.DecComplexCompr; |
| // callback->BenchProps.EncComplex = bench.EncComplex; |
| |
| COneMethodInfo method; |
| NCOM::CPropVariant propVariant; |
| propVariant = bench.Name; |
| RINOK(method.ParseMethodFromPROPVARIANT(UString(), propVariant)) |
| |
| UInt64 speed, usage; |
| |
| const HRESULT res = CrcBench( |
| EXTERNAL_CODECS_LOC_VARS |
| complexInCommands, |
| numThreads, bufSize, fileData, |
| speed, usage, |
| bench.Complex, bench.Weight, |
| (!fileData && bufSize == (1 << kNumHashDictBits)) ? &bench.CheckSum : NULL, |
| method, |
| printCallback, |
| #ifndef Z7_ST |
| affinityMode, |
| #endif |
| true, // showRating |
| encodeRes, showFreq, cpuFreq); |
| if (res == E_NOTIMPL) |
| { |
| // callback->Print(" ---"); |
| } |
| else |
| { |
| RINOK(res) |
| } |
| callback->NewLine(); |
| } |
| return S_OK; |
| } |
| |
| struct CTempValues |
| { |
| UInt64 *Values; |
| CTempValues(): Values(NULL) {} |
| void Alloc(UInt32 num) { Values = new UInt64[num]; } |
| ~CTempValues() { delete []Values; } |
| }; |
| |
| static void ParseNumberString(const UString &s, NCOM::CPropVariant &prop) |
| { |
| const wchar_t *end; |
| UInt64 result = ConvertStringToUInt64(s, &end); |
| if (*end != 0 || s.IsEmpty()) |
| prop = s; |
| else if (result <= (UInt32)0xFFFFFFFF) |
| prop = (UInt32)result; |
| else |
| prop = result; |
| } |
| |
| |
| static bool AreSameMethodNames(const char *fullName, const char *shortName) |
| { |
| return StringsAreEqualNoCase_Ascii(fullName, shortName); |
| } |
| |
| |
| |
| |
| static void Print_Usage_and_Threads(IBenchPrintCallback &f, UInt64 usage, UInt32 threads) |
| { |
| PrintRequirements(f, "usage:", true, usage, "Benchmark threads: ", threads); |
| } |
| |
| |
| static void Print_Delimiter(IBenchPrintCallback &f) |
| { |
| f.Print(" |"); |
| } |
| |
| static void Print_Pow(IBenchPrintCallback &f, unsigned pow) |
| { |
| char s[16]; |
| ConvertUInt32ToString(pow, s); |
| unsigned pos = MyStringLen(s); |
| s[pos++] = ':'; |
| s[pos] = 0; |
| PrintLeft(f, s, kFieldSize_SmallName); // 4 |
| } |
| |
| static void Bench_BW_Print_Usage_Speed(IBenchPrintCallback &f, |
| UInt64 usage, UInt64 speed) |
| { |
| PrintUsage(f, usage, kFieldSize_Usage); |
| PrintNumber(f, speed / 1000000, kFieldSize_CrcSpeed); |
| } |
| |
| |
| HRESULT Bench( |
| DECL_EXTERNAL_CODECS_LOC_VARS |
| IBenchPrintCallback *printCallback, |
| IBenchCallback *benchCallback, |
| const CObjectVector<CProperty> &props, |
| UInt32 numIterations, |
| bool multiDict, |
| IBenchFreqCallback *freqCallback) |
| { |
| if (!CrcInternalTest()) |
| return E_FAIL; |
| |
| UInt32 numCPUs = 1; |
| UInt64 ramSize = (UInt64)(sizeof(size_t)) << 29; |
| |
| NSystem::CProcessAffinity threadsInfo; |
| threadsInfo.InitST(); |
| |
| #ifndef Z7_ST |
| |
| if (threadsInfo.Get() && threadsInfo.GetNumProcessThreads() != 0) |
| numCPUs = threadsInfo.GetNumProcessThreads(); |
| else |
| numCPUs = NSystem::GetNumberOfProcessors(); |
| |
| #endif |
| |
| // numCPUs = 24; |
| /* |
| { |
| DWORD_PTR mask = (1 << 0); |
| DWORD_PTR old = SetThreadAffinityMask(GetCurrentThread(), mask); |
| old = old; |
| DWORD_PTR old2 = SetThreadAffinityMask(GetCurrentThread(), mask); |
| old2 = old2; |
| return 0; |
| } |
| */ |
| |
| bool ramSize_Defined = NSystem::GetRamSize(ramSize); |
| |
| UInt32 numThreadsSpecified = numCPUs; |
| bool needSetComplexity = false; |
| UInt32 testTimeMs = kComplexInMs; |
| UInt32 startDicLog = 22; |
| bool startDicLog_Defined = false; |
| UInt64 specifiedFreq = 0; |
| bool multiThreadTests = false; |
| UInt64 complexInCommands = kComplexInCommands; |
| UInt32 numThreads_Start = 1; |
| |
| #ifndef Z7_ST |
| CAffinityMode affinityMode; |
| #endif |
| |
| |
| COneMethodInfo method; |
| |
| CMidAlignedBuffer fileDataBuffer; |
| bool use_fileData = false; |
| bool isFixedDict = false; |
| |
| { |
| unsigned i; |
| |
| if (printCallback) |
| { |
| for (i = 0; i < props.Size(); i++) |
| { |
| const CProperty &property = props[i]; |
| printCallback->Print(" "); |
| printCallback->Print(GetAnsiString(property.Name)); |
| if (!property.Value.IsEmpty()) |
| { |
| printCallback->Print("="); |
| printCallback->Print(GetAnsiString(property.Value)); |
| } |
| } |
| if (!props.IsEmpty()) |
| printCallback->NewLine(); |
| } |
| |
| |
| for (i = 0; i < props.Size(); i++) |
| { |
| const CProperty &property = props[i]; |
| UString name (property.Name); |
| name.MakeLower_Ascii(); |
| |
| if (name.IsEqualTo("file")) |
| { |
| if (property.Value.IsEmpty()) |
| return E_INVALIDARG; |
| |
| NFile::NIO::CInFile file; |
| if (!file.Open(us2fs(property.Value))) |
| return GetLastError_noZero_HRESULT(); |
| size_t len; |
| { |
| UInt64 len64; |
| if (!file.GetLength(len64)) |
| return GetLastError_noZero_HRESULT(); |
| if (printCallback) |
| { |
| printCallback->Print("file size ="); |
| PrintNumber(*printCallback, len64, 0); |
| printCallback->NewLine(); |
| } |
| len = (size_t)len64; |
| if (len != len64) |
| return E_INVALIDARG; |
| } |
| |
| // (len == 0) is allowed. Also it's allowed if Alloc(0) returns NULL here |
| |
| ALLOC_WITH_HRESULT(&fileDataBuffer, len) |
| use_fileData = true; |
| |
| { |
| size_t processed; |
| if (!file.ReadFull((Byte *)fileDataBuffer, len, processed)) |
| return GetLastError_noZero_HRESULT(); |
| if (processed != len) |
| return E_FAIL; |
| } |
| continue; |
| } |
| |
| NCOM::CPropVariant propVariant; |
| if (!property.Value.IsEmpty()) |
| ParseNumberString(property.Value, propVariant); |
| |
| if (name.IsEqualTo("time")) |
| { |
| RINOK(ParsePropToUInt32(UString(), propVariant, testTimeMs)) |
| needSetComplexity = true; |
| testTimeMs *= 1000; |
| continue; |
| } |
| |
| if (name.IsEqualTo("timems")) |
| { |
| RINOK(ParsePropToUInt32(UString(), propVariant, testTimeMs)) |
| needSetComplexity = true; |
| continue; |
| } |
| |
| if (name.IsEqualTo("tic")) |
| { |
| UInt32 v; |
| RINOK(ParsePropToUInt32(UString(), propVariant, v)) |
| if (v >= 64) |
| return E_INVALIDARG; |
| complexInCommands = (UInt64)1 << v; |
| continue; |
| } |
| |
| const bool isCurrent_fixedDict = name.IsEqualTo("df"); |
| if (isCurrent_fixedDict) |
| isFixedDict = true; |
| if (isCurrent_fixedDict || name.IsEqualTo("ds")) |
| { |
| RINOK(ParsePropToUInt32(UString(), propVariant, startDicLog)) |
| if (startDicLog > 32) |
| return E_INVALIDARG; |
| startDicLog_Defined = true; |
| continue; |
| } |
| |
| if (name.IsEqualTo("mts")) |
| { |
| RINOK(ParsePropToUInt32(UString(), propVariant, numThreads_Start)) |
| continue; |
| } |
| |
| if (name.IsEqualTo("af")) |
| { |
| UInt32 bundle; |
| RINOK(ParsePropToUInt32(UString(), propVariant, bundle)) |
| if (bundle > 0 && bundle < numCPUs) |
| { |
| #ifndef Z7_ST |
| affinityMode.SetLevels(numCPUs, 2); |
| affinityMode.NumBundleThreads = bundle; |
| #endif |
| } |
| continue; |
| } |
| |
| if (name.IsEqualTo("freq")) |
| { |
| UInt32 freq32 = 0; |
| RINOK(ParsePropToUInt32(UString(), propVariant, freq32)) |
| if (freq32 == 0) |
| return E_INVALIDARG; |
| specifiedFreq = (UInt64)freq32 * 1000000; |
| |
| if (printCallback) |
| { |
| printCallback->Print("freq="); |
| PrintNumber(*printCallback, freq32, 0); |
| printCallback->NewLine(); |
| } |
| |
| continue; |
| } |
| |
| if (name.IsPrefixedBy_Ascii_NoCase("mt")) |
| { |
| const UString s = name.Ptr(2); |
| if (s.IsEqualTo("*") |
| || (s.IsEmpty() |
| && propVariant.vt == VT_BSTR |
| && StringsAreEqual_Ascii(propVariant.bstrVal, "*"))) |
| { |
| multiThreadTests = true; |
| continue; |
| } |
| #ifndef Z7_ST |
| RINOK(ParseMtProp(s, propVariant, numCPUs, numThreadsSpecified)) |
| #endif |
| continue; |
| } |
| |
| RINOK(method.ParseMethodFromPROPVARIANT(name, propVariant)) |
| } |
| } |
| |
| if (printCallback) |
| { |
| AString s; |
| |
| #ifndef _WIN32 |
| s += "Compiler: "; |
| GetCompiler(s); |
| printCallback->Print(s); |
| printCallback->NewLine(); |
| s.Empty(); |
| #endif |
| |
| GetSystemInfoText(s); |
| printCallback->Print(s); |
| printCallback->NewLine(); |
| } |
| |
| if (printCallback) |
| { |
| printCallback->Print("1T CPU Freq (MHz):"); |
| } |
| |
| if (printCallback || freqCallback) |
| { |
| UInt64 numMilCommands = 1 << 6; |
| if (specifiedFreq != 0) |
| { |
| while (numMilCommands > 1 && specifiedFreq < (numMilCommands * 1000000)) |
| numMilCommands >>= 1; |
| } |
| |
| for (int jj = 0;; jj++) |
| { |
| if (printCallback) |
| RINOK(printCallback->CheckBreak()) |
| |
| UInt64 start = ::GetTimeCount(); |
| UInt32 sum = (UInt32)start; |
| sum = CountCpuFreq(sum, (UInt32)(numMilCommands * 1000000 / kNumFreqCommands), g_BenchCpuFreqTemp); |
| if (sum == 0xF1541213) |
| if (printCallback) |
| printCallback->Print(""); |
| const UInt64 realDelta = ::GetTimeCount() - start; |
| start = realDelta; |
| if (start == 0) |
| start = 1; |
| if (start > (UInt64)1 << 61) |
| start = 1; |
| const UInt64 freq = GetFreq(); |
| // mips is constant in some compilers |
| const UInt64 hz = MyMultDiv64(numMilCommands * 1000000, freq, start); |
| const UInt64 mipsVal = numMilCommands * freq / start; |
| if (printCallback) |
| { |
| if (realDelta == 0) |
| { |
| printCallback->Print(" -"); |
| } |
| else |
| { |
| // PrintNumber(*printCallback, start, 0); |
| PrintNumber(*printCallback, mipsVal, 5); |
| } |
| } |
| if (freqCallback) |
| { |
| RINOK(freqCallback->AddCpuFreq(1, hz, kBenchmarkUsageMult)) |
| } |
| |
| if (jj >= 1) |
| { |
| bool needStop = (numMilCommands >= (1 << |
| #ifdef _DEBUG |
| 7 |
| #else |
| 11 |
| #endif |
| )); |
| if (start >= freq * 16) |
| { |
| printCallback->Print(" (Cmplx)"); |
| if (!freqCallback) // we don't want complexity change for old gui lzma benchmark |
| { |
| needSetComplexity = true; |
| } |
| needStop = true; |
| } |
| if (needSetComplexity) |
| SetComplexCommandsMs(testTimeMs, false, mipsVal * 1000000, complexInCommands); |
| if (needStop) |
| break; |
| numMilCommands <<= 1; |
| } |
| } |
| if (freqCallback) |
| { |
| RINOK(freqCallback->FreqsFinished(1)) |
| } |
| } |
| |
| if (numThreadsSpecified >= 2) |
| if (printCallback || freqCallback) |
| { |
| if (printCallback) |
| printCallback->NewLine(); |
| |
| /* it can show incorrect frequency for HT threads. |
| so we reduce freq test to (numCPUs / 2) */ |
| |
| UInt32 numThreads = numThreadsSpecified >= numCPUs / 2 ? numCPUs / 2: numThreadsSpecified; |
| if (numThreads < 1) |
| numThreads = 1; |
| |
| if (printCallback) |
| { |
| char s[128]; |
| ConvertUInt64ToString(numThreads, s); |
| printCallback->Print(s); |
| printCallback->Print("T CPU Freq (MHz):"); |
| } |
| UInt64 numMilCommands = 1 << |
| #ifdef _DEBUG |
| 7; |
| #else |
| 10; |
| #endif |
| |
| if (specifiedFreq != 0) |
| { |
| while (numMilCommands > 1 && specifiedFreq < (numMilCommands * 1000000)) |
| numMilCommands >>= 1; |
| } |
| |
| // for (int jj = 0;; jj++) |
| for (;;) |
| { |
| if (printCallback) |
| RINOK(printCallback->CheckBreak()) |
| |
| { |
| // PrintLeft(f, "CPU", kFieldSize_Name); |
| |
| // UInt32 resVal; |
| |
| CFreqBench fb; |
| fb.complexInCommands = numMilCommands * 1000000; |
| fb.numThreads = numThreads; |
| // showFreq; |
| // fb.showFreq = (freqTest == kNumCpuTests - 1 || specifiedFreq != 0); |
| fb.showFreq = true; |
| fb.specifiedFreq = 1; |
| |
| const HRESULT res = fb.FreqBench(NULL /* printCallback */ |
| #ifndef Z7_ST |
| , &affinityMode |
| #endif |
| ); |
| RINOK(res) |
| |
| if (freqCallback) |
| { |
| RINOK(freqCallback->AddCpuFreq(numThreads, fb.CpuFreqRes, fb.UsageRes)) |
| } |
| |
| if (printCallback) |
| { |
| /* |
| if (realDelta == 0) |
| { |
| printCallback->Print(" -"); |
| } |
| else |
| */ |
| { |
| // PrintNumber(*printCallback, start, 0); |
| PrintUsage(*printCallback, fb.UsageRes, 3); |
| printCallback->Print("%"); |
| PrintNumber(*printCallback, fb.CpuFreqRes / 1000000, 0); |
| printCallback->Print(" "); |
| |
| // PrintNumber(*printCallback, fb.UsageRes, 5); |
| } |
| } |
| } |
| // if (jj >= 1) |
| { |
| const bool needStop = (numMilCommands >= (1 << |
| #ifdef _DEBUG |
| 7 |
| #else |
| 11 |
| #endif |
| )); |
| if (needStop) |
| break; |
| numMilCommands <<= 1; |
| } |
| } |
| if (freqCallback) |
| { |
| RINOK(freqCallback->FreqsFinished(numThreads)) |
| } |
| } |
| |
| |
| if (printCallback) |
| { |
| printCallback->NewLine(); |
| printCallback->NewLine(); |
| PrintRequirements(*printCallback, "size: ", ramSize_Defined, ramSize, "CPU hardware threads:", numCPUs); |
| printCallback->Print(GetProcessThreadsInfo(threadsInfo)); |
| printCallback->NewLine(); |
| } |
| |
| if (numThreadsSpecified < 1 || numThreadsSpecified > kNumThreadsMax) |
| return E_INVALIDARG; |
| |
| UInt64 dict = (UInt64)1 << startDicLog; |
| const bool dictIsDefined = (isFixedDict || method.Get_DicSize(dict)); |
| |
| const unsigned level = method.GetLevel(); |
| |
| AString &methodName = method.MethodName; |
| const AString original_MethodName = methodName; |
| if (methodName.IsEmpty()) |
| methodName = "LZMA"; |
| |
| if (benchCallback) |
| { |
| CBenchProps benchProps; |
| benchProps.SetLzmaCompexity(); |
| const UInt64 dictSize = method.Get_Lzma_DicSize(); |
| |
| size_t uncompressedDataSize; |
| if (use_fileData) |
| { |
| uncompressedDataSize = fileDataBuffer.Size(); |
| } |
| else |
| { |
| uncompressedDataSize = kAdditionalSize + (size_t)dictSize; |
| if (uncompressedDataSize < dictSize) |
| return E_INVALIDARG; |
| } |
| |
| return MethodBench( |
| EXTERNAL_CODECS_LOC_VARS |
| complexInCommands, |
| #ifndef Z7_ST |
| true, numThreadsSpecified, |
| &affinityMode, |
| #endif |
| method, |
| uncompressedDataSize, (const Byte *)fileDataBuffer, |
| kOldLzmaDictBits, printCallback, benchCallback, &benchProps); |
| } |
| |
| if (methodName.IsEqualTo_Ascii_NoCase("CRC")) |
| methodName = "crc32"; |
| |
| CMethodId hashID; |
| const bool isHashMethod = FindHashMethod(EXTERNAL_CODECS_LOC_VARS methodName, hashID); |
| int codecIndex = -1; |
| bool isFilter = false; |
| if (!isHashMethod) |
| { |
| UInt32 numStreams; |
| codecIndex = FindMethod_Index(EXTERNAL_CODECS_LOC_VARS original_MethodName, |
| true, // encode |
| hashID, numStreams, isFilter); |
| // we can allow non filter for BW tests |
| if (!isFilter) codecIndex = -1; |
| } |
| |
| CBenchCallbackToPrint callback; |
| callback.Init(); |
| callback._file = printCallback; |
| |
| if (isHashMethod || codecIndex != -1) |
| { |
| if (!printCallback) |
| return S_FALSE; |
| IBenchPrintCallback &f = *printCallback; |
| |
| UInt64 dict64 = dict; |
| if (!dictIsDefined) |
| dict64 = (1 << 27); |
| if (use_fileData) |
| { |
| if (!dictIsDefined) |
| dict64 = fileDataBuffer.Size(); |
| else if (dict64 > fileDataBuffer.Size()) |
| dict64 = fileDataBuffer.Size(); |
| } |
| |
| for (;;) |
| { |
| const int index = method.FindProp(NCoderPropID::kDictionarySize); |
| if (index < 0) |
| break; |
| method.Props.Delete((unsigned)index); |
| } |
| |
| // methodName.RemoveChar(L'-'); |
| Int32 complexity = 16 * k_Hash_Complex_Mult; // for unknown hash method |
| const UInt32 *checkSum = NULL; |
| int benchIndex = -1; |
| |
| if (isHashMethod) |
| { |
| for (unsigned i = 0; i < Z7_ARRAY_SIZE(g_Hash); i++) |
| { |
| const CBenchHash &h = g_Hash[i]; |
| AString benchMethod (h.Name); |
| AString benchProps; |
| const int propPos = benchMethod.Find(':'); |
| if (propPos >= 0) |
| { |
| benchProps = benchMethod.Ptr((unsigned)(propPos + 1)); |
| benchMethod.DeleteFrom((unsigned)propPos); |
| } |
| |
| if (AreSameMethodNames(benchMethod, methodName)) |
| { |
| const bool sameProps = method.PropsString.IsEqualTo_Ascii_NoCase(benchProps); |
| /* |
| bool isMainMethod = method.PropsString.IsEmpty(); |
| if (isMainMethod) |
| isMainMethod = !checkSum |
| || (benchMethod.IsEqualTo_Ascii_NoCase("crc32") && benchProps.IsEqualTo_Ascii_NoCase("8")); |
| if (sameProps || isMainMethod) |
| */ |
| { |
| complexity = (Int32)h.Complex; |
| checkSum = &h.CheckSum; |
| if (sameProps) |
| break; |
| /* |
| if property. is not specified, we use the complexity |
| for latest fastest method (crc32:64) |
| */ |
| } |
| } |
| } |
| // if (!checkSum) return E_NOTIMPL; |
| } |
| else |
| { |
| for (unsigned i = 0; i < Z7_ARRAY_SIZE(g_Bench); i++) |
| { |
| const CBenchMethod &bench = g_Bench[i]; |
| AString benchMethod (bench.Name); |
| AString benchProps; |
| const int propPos = benchMethod.Find(':'); |
| if (propPos >= 0) |
| { |
| benchProps = benchMethod.Ptr((unsigned)(propPos + 1)); |
| benchMethod.DeleteFrom((unsigned)propPos); |
| } |
| |
| if (AreSameMethodNames(benchMethod, methodName)) |
| { |
| const bool sameProps = method.PropsString.IsEqualTo_Ascii_NoCase(benchProps); |
| // bool isMainMethod = method.PropsString.IsEmpty(); |
| // if (sameProps || isMainMethod) |
| { |
| benchIndex = (int)i; |
| if (sameProps) |
| break; |
| } |
| } |
| } |
| // if (benchIndex < 0) return E_NOTIMPL; |
| } |
| |
| { |
| /* we count usage only for crc and filter. non-filters are not supported */ |
| UInt64 usage = (1 << 20); |
| UInt64 bufSize = dict64; |
| UInt32 numBlocks = isHashMethod ? 1 : 3; |
| if (use_fileData) |
| { |
| usage += fileDataBuffer.Size(); |
| if (bufSize > fileDataBuffer.Size()) |
| bufSize = fileDataBuffer.Size(); |
| if (isHashMethod) |
| { |
| numBlocks = 0; |
| #ifndef Z7_ST |
| if (numThreadsSpecified != 1) |
| numBlocks = (k_Crc_CreateLocalBuf_For_File ? 1 : 0); |
| #endif |
| } |
| } |
| usage += numThreadsSpecified * bufSize * numBlocks; |
| Print_Usage_and_Threads(f, usage, numThreadsSpecified); |
| } |
| |
| CUIntVector numThreadsVector; |
| { |
| unsigned nt = numThreads_Start; |
| for (;;) |
| { |
| if (nt > numThreadsSpecified) |
| break; |
| numThreadsVector.Add(nt); |
| const unsigned next = nt * 2; |
| const UInt32 ntHalf= numThreadsSpecified / 2; |
| if (ntHalf > nt && ntHalf < next) |
| numThreadsVector.Add(ntHalf); |
| if (numThreadsSpecified > nt && numThreadsSpecified < next) |
| numThreadsVector.Add(numThreadsSpecified); |
| nt = next; |
| } |
| } |
| |
| unsigned numColumns = isHashMethod ? 1 : 2; |
| CTempValues speedTotals; |
| CTempValues usageTotals; |
| { |
| const unsigned numItems = numThreadsVector.Size() * numColumns; |
| speedTotals.Alloc(numItems); |
| usageTotals.Alloc(numItems); |
| for (unsigned i = 0; i < numItems; i++) |
| { |
| speedTotals.Values[i] = 0; |
| usageTotals.Values[i] = 0; |
| } |
| } |
| |
| f.NewLine(); |
| for (unsigned line = 0; line < 3; line++) |
| { |
| f.NewLine(); |
| f.Print(line == 0 ? "THRD" : line == 1 ? " " : "Size"); |
| FOR_VECTOR (ti, numThreadsVector) |
| { |
| if (ti != 0) |
| Print_Delimiter(f); |
| if (line == 0) |
| { |
| PrintSpaces(f, (kFieldSize_CrcSpeed + kFieldSize_Usage + 2) * (numColumns - 1)); |
| PrintNumber(f, numThreadsVector[ti], 1 + kFieldSize_Usage + kFieldSize_CrcSpeed); |
| } |
| else |
| { |
| for (unsigned c = 0; c < numColumns; c++) |
| { |
| PrintRight(f, line == 1 ? "Usage" : "%", kFieldSize_Usage + 1); |
| PrintRight(f, line == 1 ? "BW" : "MB/s", kFieldSize_CrcSpeed + 1); |
| } |
| } |
| } |
| } |
| f.NewLine(); |
| |
| UInt64 numSteps = 0; |
| |
| // for (UInt32 iter = 0; iter < numIterations; iter++) |
| // { |
| unsigned pow = 10; // kNumHashDictBits |
| if (startDicLog_Defined) |
| pow = startDicLog; |
| |
| // #define NUM_SUB_BITS 2 |
| // pow <<= NUM_SUB_BITS; |
| for (;; pow++) |
| { |
| const UInt64 bufSize = (UInt64)1 << pow; |
| // UInt64 bufSize = (UInt64)1 << (pow >> NUM_SUB_BITS); |
| // bufSize += ((UInt64)pow & ((1 << NUM_SUB_BITS) - 1)) << ((pow >> NUM_SUB_BITS) - NUM_SUB_BITS); |
| |
| size_t dataSize = fileDataBuffer.Size(); |
| if (dataSize > bufSize || !use_fileData) |
| dataSize = (size_t)bufSize; |
| |
| for (UInt32 iter = 0; iter < numIterations; iter++) |
| { |
| Print_Pow(f, pow); |
| // PrintNumber(f, bufSize >> 10, 4); |
| |
| FOR_VECTOR (ti, numThreadsVector) |
| { |
| RINOK(f.CheckBreak()) |
| const UInt32 numThreads = numThreadsVector[ti]; |
| if (isHashMethod) |
| { |
| UInt64 speed = 0; |
| UInt64 usage = 0; |
| const HRESULT res = CrcBench(EXTERNAL_CODECS_LOC_VARS complexInCommands, |
| numThreads, |
| dataSize, (const Byte *)fileDataBuffer, |
| speed, usage, |
| (UInt32)complexity, |
| 1, // benchWeight, |
| (pow == kNumHashDictBits && !use_fileData) ? checkSum : NULL, |
| method, |
| &f, |
| #ifndef Z7_ST |
| &affinityMode, |
| #endif |
| false, // showRating |
| NULL, false, 0); |
| RINOK(res) |
| |
| if (ti != 0) |
| Print_Delimiter(f); |
| |
| Bench_BW_Print_Usage_Speed(f, usage, speed); |
| speedTotals.Values[ti] += speed; |
| usageTotals.Values[ti] += usage; |
| } |
| else |
| { |
| { |
| unsigned keySize = 32; |
| if (IsString1PrefixedByString2(methodName, "AES128")) keySize = 16; |
| else if (IsString1PrefixedByString2(methodName, "AES192")) keySize = 24; |
| callback.BenchProps.KeySize = keySize; |
| } |
| |
| COneMethodInfo method2 = method; |
| unsigned bench_DictBits; |
| |
| if (benchIndex >= 0) |
| { |
| const CBenchMethod &bench = g_Bench[benchIndex]; |
| callback.BenchProps.EncComplex = bench.EncComplex; |
| callback.BenchProps.DecComplexUnc = bench.DecComplexUnc; |
| callback.BenchProps.DecComplexCompr = bench.DecComplexCompr; |
| bench_DictBits = bench.DictBits; |
| // bench_DictBits = kOldLzmaDictBits; = 32 default : for debug |
| } |
| else |
| { |
| bench_DictBits = kOldLzmaDictBits; // = 32 default |
| if (isFilter) |
| { |
| const unsigned k_UnknownCoderComplexity = 4; |
| callback.BenchProps.EncComplex = k_UnknownCoderComplexity; |
| callback.BenchProps.DecComplexUnc = k_UnknownCoderComplexity; |
| } |
| else |
| { |
| callback.BenchProps.EncComplex = 1 << 10; |
| callback.BenchProps.DecComplexUnc = 1 << 6; |
| } |
| callback.BenchProps.DecComplexCompr = 0; |
| } |
| callback.NeedPrint = false; |
| |
| if (StringsAreEqualNoCase_Ascii(method2.MethodName, "LZMA")) |
| { |
| const NCOM::CPropVariant propVariant = (UInt32)pow; |
| RINOK(method2.ParseMethodFromPROPVARIANT((UString)"d", propVariant)) |
| } |
| |
| const HRESULT res = MethodBench( |
| EXTERNAL_CODECS_LOC_VARS |
| complexInCommands, |
| #ifndef Z7_ST |
| false, // oldLzmaBenchMode |
| numThreadsVector[ti], |
| &affinityMode, |
| #endif |
| method2, |
| dataSize, (const Byte *)fileDataBuffer, |
| bench_DictBits, |
| printCallback, |
| &callback, |
| &callback.BenchProps); |
| RINOK(res) |
| |
| if (ti != 0) |
| Print_Delimiter(f); |
| |
| for (unsigned i = 0; i < 2; i++) |
| { |
| const CBenchInfo &bi = callback.BenchInfo_Results[i]; |
| const UInt64 usage = bi.GetUsage(); |
| const UInt64 speed = bi.GetUnpackSizeSpeed(); |
| usageTotals.Values[ti * 2 + i] += usage; |
| speedTotals.Values[ti * 2 + i] += speed; |
| Bench_BW_Print_Usage_Speed(f, usage, speed); |
| } |
| } |
| } |
| |
| f.NewLine(); |
| numSteps++; |
| } |
| if (dataSize >= dict64) |
| break; |
| } |
| |
| if (numSteps != 0) |
| { |
| f.Print("Avg:"); |
| for (unsigned ti = 0; ti < numThreadsVector.Size(); ti++) |
| { |
| if (ti != 0) |
| Print_Delimiter(f); |
| for (unsigned i = 0; i < numColumns; i++) |
| Bench_BW_Print_Usage_Speed(f, |
| usageTotals.Values[ti * numColumns + i] / numSteps, |
| speedTotals.Values[ti * numColumns + i] / numSteps); |
| } |
| f.NewLine(); |
| } |
| |
| return S_OK; |
| } |
| |
| bool use2Columns = false; |
| |
| bool totalBenchMode = false; |
| bool onlyHashBench = false; |
| if (methodName.IsEqualTo_Ascii_NoCase("hash")) |
| { |
| onlyHashBench = true; |
| methodName = "*"; |
| totalBenchMode = true; |
| } |
| else if (methodName.Find('*') >= 0) |
| totalBenchMode = true; |
| |
| // ---------- Threads loop ---------- |
| for (unsigned threadsPassIndex = 0; threadsPassIndex < 3; threadsPassIndex++) |
| { |
| |
| UInt32 numThreads = numThreadsSpecified; |
| |
| if (!multiThreadTests) |
| { |
| if (threadsPassIndex != 0) |
| break; |
| } |
| else |
| { |
| numThreads = 1; |
| if (threadsPassIndex != 0) |
| { |
| if (numCPUs < 2) |
| break; |
| numThreads = numCPUs; |
| if (threadsPassIndex == 1) |
| { |
| if (numCPUs >= 4) |
| numThreads = numCPUs / 2; |
| } |
| else if (numCPUs < 4) |
| break; |
| } |
| } |
| |
| IBenchPrintCallback &f = *printCallback; |
| |
| if (threadsPassIndex > 0) |
| { |
| f.NewLine(); |
| f.NewLine(); |
| } |
| |
| if (!dictIsDefined && !onlyHashBench) |
| { |
| const unsigned dicSizeLog_Main = (totalBenchMode ? 24 : 25); |
| unsigned dicSizeLog = dicSizeLog_Main; |
| |
| #ifdef UNDER_CE |
| dicSizeLog = (UInt64)1 << 20; |
| #endif |
| |
| if (ramSize_Defined) |
| for (; dicSizeLog > kBenchMinDicLogSize; dicSizeLog--) |
| if (GetBenchMemoryUsage(numThreads, (int)level, ((UInt64)1 << dicSizeLog), totalBenchMode) + (8 << 20) <= ramSize) |
| break; |
| |
| dict = (UInt64)1 << dicSizeLog; |
| |
| if (totalBenchMode && dicSizeLog != dicSizeLog_Main) |
| { |
| f.Print("Dictionary reduced to: "); |
| PrintNumber(f, dicSizeLog, 1); |
| f.NewLine(); |
| } |
| } |
| |
| Print_Usage_and_Threads(f, |
| onlyHashBench ? |
| GetBenchMemoryUsage_Hash(numThreads, dict) : |
| GetBenchMemoryUsage(numThreads, (int)level, dict, totalBenchMode), |
| numThreads); |
| |
| f.NewLine(); |
| |
| f.NewLine(); |
| |
| if (totalBenchMode) |
| { |
| callback.NameFieldSize = kFieldSize_Name; |
| use2Columns = false; |
| } |
| else |
| { |
| callback.NameFieldSize = kFieldSize_SmallName; |
| use2Columns = true; |
| } |
| callback.Use2Columns = use2Columns; |
| |
| bool showFreq = false; |
| UInt64 cpuFreq = 0; |
| |
| if (totalBenchMode) |
| { |
| showFreq = true; |
| } |
| |
| unsigned fileldSize = kFieldSize_TotalSize; |
| if (showFreq) |
| fileldSize += kFieldSize_EUAndEffec; |
| |
| if (use2Columns) |
| { |
| PrintSpaces(f, callback.NameFieldSize); |
| PrintRight(f, "Compressing", fileldSize); |
| f.Print(kSep); |
| PrintRight(f, "Decompressing", fileldSize); |
| } |
| f.NewLine(); |
| PrintLeft(f, totalBenchMode ? "Method" : "Dict", callback.NameFieldSize); |
| |
| int j; |
| |
| for (j = 0; j < 2; j++) |
| { |
| PrintRight(f, "Speed", kFieldSize_Speed + 1); |
| PrintRight(f, "Usage", kFieldSize_Usage + 1); |
| PrintRight(f, "R/U", kFieldSize_RU + 1); |
| PrintRight(f, "Rating", kFieldSize_Rating + 1); |
| if (showFreq) |
| { |
| PrintRight(f, "E/U", kFieldSize_EU + 1); |
| PrintRight(f, "Effec", kFieldSize_Effec + 1); |
| } |
| if (!use2Columns) |
| break; |
| if (j == 0) |
| f.Print(kSep); |
| } |
| |
| f.NewLine(); |
| PrintSpaces(f, callback.NameFieldSize); |
| |
| for (j = 0; j < 2; j++) |
| { |
| PrintRight(f, "KiB/s", kFieldSize_Speed + 1); |
| PrintRight(f, "%", kFieldSize_Usage + 1); |
| PrintRight(f, "MIPS", kFieldSize_RU + 1); |
| PrintRight(f, "MIPS", kFieldSize_Rating + 1); |
| if (showFreq) |
| { |
| PrintRight(f, "%", kFieldSize_EU + 1); |
| PrintRight(f, "%", kFieldSize_Effec + 1); |
| } |
| if (!use2Columns) |
| break; |
| if (j == 0) |
| f.Print(kSep); |
| } |
| |
| f.NewLine(); |
| f.NewLine(); |
| |
| if (specifiedFreq != 0) |
| cpuFreq = specifiedFreq; |
| |
| // bool showTotalSpeed = false; |
| |
| if (totalBenchMode) |
| { |
| for (UInt32 i = 0; i < numIterations; i++) |
| { |
| if (i != 0) |
| printCallback->NewLine(); |
| |
| const unsigned kNumCpuTests = 3; |
| for (unsigned freqTest = 0; freqTest < kNumCpuTests; freqTest++) |
| { |
| PrintLeft(f, "CPU", kFieldSize_Name); |
| |
| // UInt32 resVal; |
| |
| CFreqBench fb; |
| fb.complexInCommands = complexInCommands; |
| fb.numThreads = numThreads; |
| // showFreq; |
| fb.showFreq = (freqTest == kNumCpuTests - 1 || specifiedFreq != 0); |
| fb.specifiedFreq = specifiedFreq; |
| |
| const HRESULT res = fb.FreqBench(printCallback |
| #ifndef Z7_ST |
| , &affinityMode |
| #endif |
| ); |
| RINOK(res) |
| |
| cpuFreq = fb.CpuFreqRes; |
| callback.NewLine(); |
| |
| if (specifiedFreq != 0) |
| cpuFreq = specifiedFreq; |
| |
| if (testTimeMs >= 1000) |
| if (freqTest == kNumCpuTests - 1) |
| { |
| // SetComplexCommandsMs(testTimeMs, specifiedFreq != 0, cpuFreq, complexInCommands); |
| } |
| } |
| callback.NewLine(); |
| |
| // return S_OK; // change it |
| |
| callback.SetFreq(true, cpuFreq); |
| |
| if (!onlyHashBench) |
| { |
| size_t dataSize = (size_t)dict; |
| if (use_fileData) |
| { |
| dataSize = fileDataBuffer.Size(); |
| if (dictIsDefined && dataSize > dict) |
| dataSize = (size_t)dict; |
| } |
| |
| const HRESULT res = TotalBench(EXTERNAL_CODECS_LOC_VARS |
| method, complexInCommands, |
| #ifndef Z7_ST |
| numThreads, |
| &affinityMode, |
| #endif |
| dictIsDefined || use_fileData, // forceUnpackSize |
| dataSize, |
| (const Byte *)fileDataBuffer, |
| printCallback, &callback); |
| RINOK(res) |
| } |
| |
| { |
| size_t dataSize = (size_t)1 << kNumHashDictBits; |
| if (dictIsDefined) |
| { |
| dataSize = (size_t)dict; |
| if (dataSize != dict) |
| return E_OUTOFMEMORY; |
| } |
| if (use_fileData) |
| { |
| dataSize = fileDataBuffer.Size(); |
| if (dictIsDefined && dataSize > dict) |
| dataSize = (size_t)dict; |
| } |
| |
| const HRESULT res = TotalBench_Hash(EXTERNAL_CODECS_LOC_VARS |
| method, complexInCommands, |
| numThreads, |
| dataSize, (const Byte *)fileDataBuffer, |
| printCallback, &callback, |
| #ifndef Z7_ST |
| &affinityMode, |
| #endif |
| &callback.EncodeRes, true, cpuFreq); |
| RINOK(res) |
| } |
| |
| callback.NewLine(); |
| { |
| PrintLeft(f, "CPU", kFieldSize_Name); |
| |
| CFreqBench fb; |
| fb.complexInCommands = complexInCommands; |
| fb.numThreads = numThreads; |
| // showFreq; |
| fb.showFreq = (specifiedFreq != 0); |
| fb.specifiedFreq = specifiedFreq; |
| |
| const HRESULT res = fb.FreqBench(printCallback |
| #ifndef Z7_ST |
| , &affinityMode |
| #endif |
| ); |
| RINOK(res) |
| callback.NewLine(); |
| } |
| } |
| } |
| else |
| { |
| needSetComplexity = true; |
| if (!methodName.IsEqualTo_Ascii_NoCase("LZMA")) |
| { |
| unsigned i; |
| for (i = 0; i < Z7_ARRAY_SIZE(g_Bench); i++) |
| { |
| const CBenchMethod &h = g_Bench[i]; |
| AString benchMethod (h.Name); |
| AString benchProps; |
| const int propPos = benchMethod.Find(':'); |
| if (propPos >= 0) |
| { |
| benchProps = benchMethod.Ptr((unsigned)(propPos + 1)); |
| benchMethod.DeleteFrom((unsigned)propPos); |
| } |
| |
| if (AreSameMethodNames(benchMethod, methodName)) |
| { |
| if (benchProps.IsEmpty() |
| || (benchProps == "x5" && method.PropsString.IsEmpty()) |
| || method.PropsString.IsPrefixedBy_Ascii_NoCase(benchProps)) |
| { |
| callback.BenchProps.EncComplex = h.EncComplex; |
| callback.BenchProps.DecComplexCompr = h.DecComplexCompr; |
| callback.BenchProps.DecComplexUnc = h.DecComplexUnc; |
| needSetComplexity = false; |
| break; |
| } |
| } |
| } |
| /* |
| if (i == Z7_ARRAY_SIZE(g_Bench)) |
| return E_NOTIMPL; |
| */ |
| } |
| if (needSetComplexity) |
| callback.BenchProps.SetLzmaCompexity(); |
| |
| if (startDicLog < kBenchMinDicLogSize) |
| startDicLog = kBenchMinDicLogSize; |
| |
| for (unsigned i = 0; i < numIterations; i++) |
| { |
| unsigned pow = (dict < GetDictSizeFromLog(startDicLog)) ? kBenchMinDicLogSize : (unsigned)startDicLog; |
| if (!multiDict) |
| pow = 32; |
| while (GetDictSizeFromLog(pow) > dict && pow > 0) |
| pow--; |
| for (; GetDictSizeFromLog(pow) <= dict; pow++) |
| { |
| Print_Pow(f, pow); |
| callback.DictSize = (UInt64)1 << pow; |
| |
| COneMethodInfo method2 = method; |
| |
| if (StringsAreEqualNoCase_Ascii(method2.MethodName, "LZMA")) |
| { |
| // We add dictionary size property. |
| // method2 can have two different dictionary size properties. |
| // And last property is main. |
| NCOM::CPropVariant propVariant = (UInt32)pow; |
| RINOK(method2.ParseMethodFromPROPVARIANT((UString)"d", propVariant)) |
| } |
| |
| size_t uncompressedDataSize; |
| if (use_fileData) |
| { |
| uncompressedDataSize = fileDataBuffer.Size(); |
| } |
| else |
| { |
| uncompressedDataSize = (size_t)callback.DictSize; |
| if (uncompressedDataSize != callback.DictSize) |
| return E_OUTOFMEMORY; |
| if (uncompressedDataSize >= (1 << 18)) |
| uncompressedDataSize += kAdditionalSize; |
| } |
| |
| const HRESULT res = MethodBench( |
| EXTERNAL_CODECS_LOC_VARS |
| complexInCommands, |
| #ifndef Z7_ST |
| true, numThreads, |
| &affinityMode, |
| #endif |
| method2, |
| uncompressedDataSize, (const Byte *)fileDataBuffer, |
| kOldLzmaDictBits, printCallback, &callback, &callback.BenchProps); |
| f.NewLine(); |
| RINOK(res) |
| if (!multiDict) |
| break; |
| } |
| } |
| } |
| |
| PrintChars(f, '-', callback.NameFieldSize + fileldSize); |
| |
| if (use2Columns) |
| { |
| f.Print(kSep); |
| PrintChars(f, '-', fileldSize); |
| } |
| |
| f.NewLine(); |
| |
| if (use2Columns) |
| { |
| PrintLeft(f, "Avr:", callback.NameFieldSize); |
| PrintTotals(f, showFreq, cpuFreq, !totalBenchMode, callback.EncodeRes); |
| f.Print(kSep); |
| PrintTotals(f, showFreq, cpuFreq, !totalBenchMode, callback.DecodeRes); |
| f.NewLine(); |
| } |
| |
| PrintLeft(f, "Tot:", callback.NameFieldSize); |
| CTotalBenchRes midRes; |
| midRes = callback.EncodeRes; |
| midRes.Update_With_Res(callback.DecodeRes); |
| |
| // midRes.SetSum(callback.EncodeRes, callback.DecodeRes); |
| PrintTotals(f, showFreq, cpuFreq, false, midRes); |
| f.NewLine(); |
| |
| } |
| return S_OK; |
| } |