blob: 5da9783d4e19755ff3a03d9fd67db9d9c649e957 [file] [log] [blame]
// Bench.cpp
#include "StdAfx.h"
#include "../../../../C/CpuArch.h"
// #include <stdio.h>
#ifndef _WIN32
#define USE_POSIX_TIME
#define USE_POSIX_TIME2
#endif // _WIN32
#ifdef USE_POSIX_TIME
#include <time.h>
#include <unistd.h>
#ifdef USE_POSIX_TIME2
#include <sys/time.h>
#include <sys/times.h>
#endif
#endif // USE_POSIX_TIME
#ifdef _WIN32
#define USE_ALLOCA
#endif
#ifdef USE_ALLOCA
#ifdef _WIN32
#include <malloc.h>
#else
#include <stdlib.h>
#endif
#endif
#include "../../../../C/7zCrc.h"
#include "../../../../C/RotateDefs.h"
#ifndef Z7_ST
#include "../../../Windows/Synchronization.h"
#include "../../../Windows/Thread.h"
#endif
#include "../../../Windows/FileFind.h"
#include "../../../Windows/FileIO.h"
#include "../../../Windows/SystemInfo.h"
#include "../../../Common/MyBuffer2.h"
#include "../../../Common/IntToString.h"
#include "../../../Common/StringConvert.h"
#include "../../../Common/StringToInt.h"
#include "../../../Common/Wildcard.h"
#include "../../Common/MethodProps.h"
#include "../../Common/StreamObjects.h"
#include "../../Common/StreamUtils.h"
#include "Bench.h"
using namespace NWindows;
#ifndef Z7_ST
static const UInt32 k_LZMA = 0x030101;
#endif
static const UInt64 kComplexInCommands = (UInt64)1 <<
#ifdef UNDER_CE
31;
#else
34;
#endif
static const UInt32 kComplexInMs = 4000;
static void SetComplexCommandsMs(UInt32 complexInMs,
bool isSpecifiedFreq, UInt64 cpuFreq, UInt64 &complexInCommands)
{
complexInCommands = kComplexInCommands;
const UInt64 kMinFreq = (UInt64)1000000 * 4;
const UInt64 kMaxFreq = (UInt64)1000000 * 20000;
if (cpuFreq < kMinFreq && !isSpecifiedFreq)
cpuFreq = kMinFreq;
if (cpuFreq < kMaxFreq || isSpecifiedFreq)
{
if (complexInMs != 0)
complexInCommands = complexInMs * cpuFreq / 1000;
else
complexInCommands = cpuFreq >> 2;
}
}
// const UInt64 kBenchmarkUsageMult = 1000000; // for debug
static const unsigned kBenchmarkUsageMultBits = 16;
static const UInt64 kBenchmarkUsageMult = 1 << kBenchmarkUsageMultBits;
UInt64 Benchmark_GetUsage_Percents(UInt64 usage)
{
return (100 * usage + kBenchmarkUsageMult / 2) / kBenchmarkUsageMult;
}
static const unsigned kNumHashDictBits = 17;
static const UInt32 kFilterUnpackSize = (47 << 10); // + 5; // for test
static const unsigned kOldLzmaDictBits = 32;
// static const size_t kAdditionalSize = (size_t)1 << 32; // for debug
static const size_t kAdditionalSize = (size_t)1 << 16;
static const UInt32 kCompressedAdditionalSize = (1 << 10);
static const UInt32 kMaxMethodPropSize = (1 << 6);
#define ALLOC_WITH_HRESULT(_buffer_, _size_) \
{ (_buffer_)->Alloc(_size_); \
if (_size_ && !(_buffer_)->IsAllocated()) return E_OUTOFMEMORY; }
class CBaseRandomGenerator
{
UInt32 A1;
UInt32 A2;
UInt32 Salt;
public:
CBaseRandomGenerator(UInt32 salt = 0): Salt(salt) { Init(); }
void Init() { A1 = 362436069; A2 = 521288629;}
Z7_FORCE_INLINE
UInt32 GetRnd()
{
return Salt ^
(
((A1 = 36969 * (A1 & 0xffff) + (A1 >> 16)) << 16) +
((A2 = 18000 * (A2 & 0xffff) + (A2 >> 16)) )
);
}
};
Z7_NO_INLINE
static void RandGen(Byte *buf, size_t size)
{
CBaseRandomGenerator RG;
const size_t size4 = size & ~((size_t)3);
size_t i;
for (i = 0; i < size4; i += 4)
{
const UInt32 v = RG.GetRnd();
SetUi32(buf + i, v)
}
UInt32 v = RG.GetRnd();
for (; i < size; i++)
{
buf[i] = (Byte)v;
v >>= 8;
}
}
class CBenchRandomGenerator: public CMidAlignedBuffer
{
static UInt32 GetVal(UInt32 &res, unsigned numBits)
{
UInt32 val = res & (((UInt32)1 << numBits) - 1);
res >>= numBits;
return val;
}
static UInt32 GetLen(UInt32 &r)
{
UInt32 len = GetVal(r, 2);
return GetVal(r, 1 + len);
}
public:
void GenerateSimpleRandom(UInt32 salt)
{
CBaseRandomGenerator rg(salt);
const size_t bufSize = Size();
Byte *buf = (Byte *)*this;
for (size_t i = 0; i < bufSize; i++)
buf[i] = (Byte)rg.GetRnd();
}
void GenerateLz(unsigned dictBits, UInt32 salt)
{
CBaseRandomGenerator rg(salt);
size_t pos = 0;
size_t rep0 = 1;
const size_t bufSize = Size();
Byte *buf = (Byte *)*this;
unsigned posBits = 1;
// printf("\n dictBits = %d\n", (UInt32)dictBits);
// printf("\n bufSize = 0x%p\n", (const void *)bufSize);
while (pos < bufSize)
{
/*
if (pos >= ((UInt32)1 << 31))
printf(" %x\n", pos);
*/
UInt32 r = rg.GetRnd();
if (GetVal(r, 1) == 0 || pos < 1024)
buf[pos++] = (Byte)(r & 0xFF);
else
{
UInt32 len;
len = 1 + GetLen(r);
if (GetVal(r, 3) != 0)
{
len += GetLen(r);
while (((size_t)1 << posBits) < pos)
posBits++;
unsigned numBitsMax = dictBits;
if (numBitsMax > posBits)
numBitsMax = posBits;
const unsigned kAddBits = 6;
unsigned numLogBits = 5;
if (numBitsMax <= (1 << 4) - 1 + kAddBits)
numLogBits = 4;
for (;;)
{
const UInt32 ppp = GetVal(r, numLogBits) + kAddBits;
r = rg.GetRnd();
if (ppp > numBitsMax)
continue;
// rep0 = GetVal(r, ppp);
rep0 = r & (((size_t)1 << ppp) - 1);
if (rep0 < pos)
break;
r = rg.GetRnd();
}
rep0++;
}
// len *= 300; // for debug
{
const size_t rem = bufSize - pos;
if (len > rem)
len = (UInt32)rem;
}
Byte *dest = buf + pos;
const Byte *src = dest - rep0;
pos += len;
for (UInt32 i = 0; i < len; i++)
*dest++ = *src++;
}
}
// printf("\n CRC = %x\n", CrcCalc(buf, bufSize));
}
};
Z7_CLASS_IMP_NOQIB_1(
CBenchmarkInStream
, ISequentialInStream
)
const Byte *Data;
size_t Pos;
size_t Size;
public:
void Init(const Byte *data, size_t size)
{
Data = data;
Size = size;
Pos = 0;
}
bool WasFinished() const { return Pos == Size; }
};
Z7_COM7F_IMF(CBenchmarkInStream::Read(void *data, UInt32 size, UInt32 *processedSize))
{
const UInt32 kMaxBlockSize = (1 << 20);
if (size > kMaxBlockSize)
size = kMaxBlockSize;
const size_t remain = Size - Pos;
if (size > remain)
size = (UInt32)remain;
if (size != 0)
memcpy(data, Data + Pos, size);
Pos += size;
if (processedSize)
*processedSize = size;
return S_OK;
}
class CBenchmarkOutStream Z7_final:
public ISequentialOutStream,
public CMyUnknownImp,
public CMidAlignedBuffer
{
Z7_COM_UNKNOWN_IMP_0
Z7_IFACE_COM7_IMP(ISequentialOutStream)
// bool _overflow;
public:
size_t Pos;
bool RealCopy;
bool CalcCrc;
UInt32 Crc;
// CBenchmarkOutStream(): _overflow(false) {}
void Init(bool realCopy, bool calcCrc)
{
Crc = CRC_INIT_VAL;
RealCopy = realCopy;
CalcCrc = calcCrc;
// _overflow = false;
Pos = 0;
}
void InitCrc()
{
Crc = CRC_INIT_VAL;
}
void Calc(const void *data, size_t size)
{
Crc = CrcUpdate(Crc, data, size);
}
size_t GetPos() const { return Pos; }
// void Print() { printf("\n%8d %8d\n", (unsigned)BufferSize, (unsigned)Pos); }
};
Z7_COM7F_IMF(CBenchmarkOutStream::Write(const void *data, UInt32 size, UInt32 *processedSize))
{
size_t curSize = Size() - Pos;
if (curSize > size)
curSize = size;
if (curSize != 0)
{
if (RealCopy)
memcpy(((Byte *)*this) + Pos, data, curSize);
if (CalcCrc)
Calc(data, curSize);
Pos += curSize;
}
if (processedSize)
*processedSize = (UInt32)curSize;
if (curSize != size)
{
// _overflow = true;
return E_FAIL;
}
return S_OK;
}
Z7_CLASS_IMP_NOQIB_1(
CCrcOutStream
, ISequentialOutStream
)
public:
bool CalcCrc;
UInt32 Crc;
UInt64 Pos;
CCrcOutStream(): CalcCrc(true) {}
void Init() { Crc = CRC_INIT_VAL; Pos = 0; }
void Calc(const void *data, size_t size)
{
Crc = CrcUpdate(Crc, data, size);
}
};
Z7_COM7F_IMF(CCrcOutStream::Write(const void *data, UInt32 size, UInt32 *processedSize))
{
if (CalcCrc)
Calc(data, size);
Pos += size;
if (processedSize)
*processedSize = size;
return S_OK;
}
// #include "../../../../C/My_sys_time.h"
static UInt64 GetTimeCount()
{
#ifdef USE_POSIX_TIME
#ifdef USE_POSIX_TIME2
timeval v;
if (gettimeofday(&v, NULL) == 0)
return (UInt64)(v.tv_sec) * 1000000 + (UInt64)v.tv_usec;
return (UInt64)time(NULL) * 1000000;
#else
return time(NULL);
#endif
#else
LARGE_INTEGER value;
if (::QueryPerformanceCounter(&value))
return (UInt64)value.QuadPart;
return GetTickCount();
#endif
}
static UInt64 GetFreq()
{
#ifdef USE_POSIX_TIME
#ifdef USE_POSIX_TIME2
return 1000000;
#else
return 1;
#endif
#else
LARGE_INTEGER value;
if (::QueryPerformanceFrequency(&value))
return (UInt64)value.QuadPart;
return 1000;
#endif
}
#ifdef USE_POSIX_TIME
struct CUserTime
{
UInt64 Sum;
clock_t Prev;
void Init()
{
// Prev = clock();
Sum = 0;
Prev = 0;
Update();
Sum = 0;
}
void Update()
{
tms t;
/* clock_t res = */ times(&t);
clock_t newVal = t.tms_utime + t.tms_stime;
Sum += (UInt64)(newVal - Prev);
Prev = newVal;
/*
clock_t v = clock();
if (v != -1)
{
Sum += v - Prev;
Prev = v;
}
*/
}
UInt64 GetUserTime()
{
Update();
return Sum;
}
};
#else
struct CUserTime
{
bool UseTick;
DWORD Prev_Tick;
UInt64 Prev;
UInt64 Sum;
void Init()
{
UseTick = false;
Prev_Tick = 0;
Prev = 0;
Sum = 0;
Update();
Sum = 0;
}
UInt64 GetUserTime()
{
Update();
return Sum;
}
void Update();
};
static inline UInt64 GetTime64(const FILETIME &t) { return ((UInt64)t.dwHighDateTime << 32) | t.dwLowDateTime; }
void CUserTime::Update()
{
DWORD new_Tick = GetTickCount();
FILETIME creationTime, exitTime, kernelTime, userTime;
if (!UseTick &&
#ifdef UNDER_CE
::GetThreadTimes(::GetCurrentThread()
#else
::GetProcessTimes(::GetCurrentProcess()
#endif
, &creationTime, &exitTime, &kernelTime, &userTime))
{
UInt64 newVal = GetTime64(userTime) + GetTime64(kernelTime);
Sum += newVal - Prev;
Prev = newVal;
}
else
{
UseTick = true;
Sum += (UInt64)(new_Tick - (DWORD)Prev_Tick) * 10000;
}
Prev_Tick = new_Tick;
}
#endif
static UInt64 GetUserFreq()
{
#ifdef USE_POSIX_TIME
// return CLOCKS_PER_SEC;
return (UInt64)sysconf(_SC_CLK_TCK);
#else
return 10000000;
#endif
}
class CBenchProgressStatus Z7_final
{
#ifndef Z7_ST
NSynchronization::CCriticalSection CS;
#endif
public:
HRESULT Res;
bool EncodeMode;
void SetResult(HRESULT res)
{
#ifndef Z7_ST
NSynchronization::CCriticalSectionLock lock(CS);
#endif
Res = res;
}
HRESULT GetResult()
{
#ifndef Z7_ST
NSynchronization::CCriticalSectionLock lock(CS);
#endif
return Res;
}
};
struct CBenchInfoCalc
{
CBenchInfo BenchInfo;
CUserTime UserTime;
void SetStartTime();
void SetFinishTime(CBenchInfo &dest);
};
void CBenchInfoCalc::SetStartTime()
{
BenchInfo.GlobalFreq = GetFreq();
BenchInfo.UserFreq = GetUserFreq();
BenchInfo.GlobalTime = ::GetTimeCount();
BenchInfo.UserTime = 0;
UserTime.Init();
}
void CBenchInfoCalc::SetFinishTime(CBenchInfo &dest)
{
dest = BenchInfo;
dest.GlobalTime = ::GetTimeCount() - BenchInfo.GlobalTime;
dest.UserTime = UserTime.GetUserTime();
}
class CBenchProgressInfo Z7_final:
public ICompressProgressInfo,
public CMyUnknownImp,
public CBenchInfoCalc
{
Z7_COM_UNKNOWN_IMP_0
Z7_IFACE_COM7_IMP(ICompressProgressInfo)
public:
CBenchProgressStatus *Status;
IBenchCallback *Callback;
CBenchProgressInfo(): Callback(NULL) {}
};
Z7_COM7F_IMF(CBenchProgressInfo::SetRatioInfo(const UInt64 *inSize, const UInt64 *outSize))
{
HRESULT res = Status->GetResult();
if (res != S_OK)
return res;
if (!Callback)
return res;
/*
static UInt64 inSizePrev = 0;
static UInt64 outSizePrev = 0;
UInt64 delta1 = 0, delta2 = 0, val1 = 0, val2 = 0;
if (inSize) { val1 = *inSize; delta1 = val1 - inSizePrev; inSizePrev = val1; }
if (outSize) { val2 = *outSize; delta2 = val2 - outSizePrev; outSizePrev = val2; }
UInt64 percents = delta2 * 1000;
if (delta1 != 0)
percents /= delta1;
printf("=== %7d %7d %7d %7d ratio = %4d\n",
(unsigned)(val1 >> 10), (unsigned)(delta1 >> 10),
(unsigned)(val2 >> 10), (unsigned)(delta2 >> 10),
(unsigned)percents);
*/
CBenchInfo info;
SetFinishTime(info);
if (Status->EncodeMode)
{
info.UnpackSize = BenchInfo.UnpackSize + *inSize;
info.PackSize = BenchInfo.PackSize + *outSize;
res = Callback->SetEncodeResult(info, false);
}
else
{
info.PackSize = BenchInfo.PackSize + *inSize;
info.UnpackSize = BenchInfo.UnpackSize + *outSize;
res = Callback->SetDecodeResult(info, false);
}
if (res != S_OK)
Status->SetResult(res);
return res;
}
static const unsigned kSubBits = 8;
static unsigned GetLogSize(UInt64 size)
{
unsigned i = 0;
for (;;)
{
i++; size >>= 1; if (size == 0) break;
}
return i;
}
static UInt32 GetLogSize_Sub(UInt64 size)
{
if (size <= 1)
return 0;
const unsigned i = GetLogSize(size) - 1;
UInt32 v;
if (i <= kSubBits)
v = (UInt32)(size) << (kSubBits - i);
else
v = (UInt32)(size >> (i - kSubBits));
return ((UInt32)i << kSubBits) + (v & (((UInt32)1 << kSubBits) - 1));
}
static UInt64 Get_UInt64_from_double(double v)
{
const UInt64 kMaxVal = (UInt64)1 << 62;
if (v > (double)(Int64)kMaxVal)
return kMaxVal;
return (UInt64)v;
}
static UInt64 MyMultDiv64(UInt64 m1, UInt64 m2, UInt64 d)
{
if (d == 0)
d = 1;
const double v =
(double)(Int64)m1 *
(double)(Int64)m2 /
(double)(Int64)d;
return Get_UInt64_from_double(v);
/*
unsigned n1 = GetLogSize(m1);
unsigned n2 = GetLogSize(m2);
while (n1 + n2 > 64)
{
if (n1 >= n2)
{
m1 >>= 1;
n1--;
}
else
{
m2 >>= 1;
n2--;
}
d >>= 1;
}
if (d == 0)
d = 1;
return m1 * m2 / d;
*/
}
UInt64 CBenchInfo::GetUsage() const
{
UInt64 userTime = UserTime;
UInt64 userFreq = UserFreq;
UInt64 globalTime = GlobalTime;
UInt64 globalFreq = GlobalFreq;
if (userFreq == 0)
userFreq = 1;
if (globalTime == 0)
globalTime = 1;
const double v =
((double)(Int64)userTime / (double)(Int64)userFreq)
* ((double)(Int64)globalFreq / (double)(Int64)globalTime)
* (double)(Int64)kBenchmarkUsageMult;
return Get_UInt64_from_double(v);
/*
return MyMultDiv64(
MyMultDiv64(kBenchmarkUsageMult, userTime, userFreq),
globalFreq, globalTime);
*/
}
UInt64 CBenchInfo::GetRatingPerUsage(UInt64 rating) const
{
if (UserTime == 0)
{
return 0;
// userTime = 1;
}
UInt64 globalFreq = GlobalFreq;
if (globalFreq == 0)
globalFreq = 1;
const double v =
((double)(Int64)GlobalTime / (double)(Int64)globalFreq)
* ((double)(Int64)UserFreq / (double)(Int64)UserTime)
* (double)(Int64)rating;
return Get_UInt64_from_double(v);
/*
return MyMultDiv64(
MyMultDiv64(rating, UserFreq, UserTime),
GlobalTime, globalFreq);
*/
}
UInt64 CBenchInfo::GetSpeed(UInt64 numUnits) const
{
return MyMultDiv64(numUnits, GlobalFreq, GlobalTime);
}
static UInt64 GetNumCommands_from_Size_and_Complexity(UInt64 size, Int32 complexity)
{
return complexity >= 0 ?
size * (UInt32)complexity :
size / (UInt32)(-complexity);
}
struct CBenchProps
{
bool LzmaRatingMode;
Int32 EncComplex;
Int32 DecComplexCompr;
Int32 DecComplexUnc;
unsigned KeySize;
CBenchProps():
LzmaRatingMode(false),
KeySize(0)
{}
void SetLzmaCompexity();
UInt64 GetNumCommands_Enc(UInt64 unpackSize) const
{
const UInt32 kMinSize = 100;
if (unpackSize < kMinSize)
unpackSize = kMinSize;
return GetNumCommands_from_Size_and_Complexity(unpackSize, EncComplex);
}
UInt64 GetNumCommands_Dec(UInt64 packSize, UInt64 unpackSize) const
{
return
GetNumCommands_from_Size_and_Complexity(packSize, DecComplexCompr) +
GetNumCommands_from_Size_and_Complexity(unpackSize, DecComplexUnc);
}
UInt64 GetRating_Enc(UInt64 dictSize, UInt64 elapsedTime, UInt64 freq, UInt64 size) const;
UInt64 GetRating_Dec(UInt64 elapsedTime, UInt64 freq, UInt64 outSize, UInt64 inSize, UInt64 numIterations) const;
};
void CBenchProps::SetLzmaCompexity()
{
EncComplex = 1200;
DecComplexUnc = 4;
DecComplexCompr = 190;
LzmaRatingMode = true;
}
UInt64 CBenchProps::GetRating_Enc(UInt64 dictSize, UInt64 elapsedTime, UInt64 freq, UInt64 size) const
{
if (dictSize < (1 << kBenchMinDicLogSize))
dictSize = (1 << kBenchMinDicLogSize);
Int32 encComplex = EncComplex;
if (LzmaRatingMode)
{
/*
for (UInt64 uu = 0; uu < (UInt64)0xf << 60;)
{
unsigned rr = GetLogSize_Sub(uu);
printf("\n%16I64x , log = %4x", uu, rr);
uu += 1;
uu += uu / 50;
}
*/
// throw 1;
const UInt32 t = GetLogSize_Sub(dictSize) - (kBenchMinDicLogSize << kSubBits);
encComplex = 870 + ((t * t * 5) >> (2 * kSubBits));
}
const UInt64 numCommands = GetNumCommands_from_Size_and_Complexity(size, encComplex);
return MyMultDiv64(numCommands, freq, elapsedTime);
}
UInt64 CBenchProps::GetRating_Dec(UInt64 elapsedTime, UInt64 freq, UInt64 outSize, UInt64 inSize, UInt64 numIterations) const
{
const UInt64 numCommands = GetNumCommands_Dec(inSize, outSize) * numIterations;
return MyMultDiv64(numCommands, freq, elapsedTime);
}
UInt64 CBenchInfo::GetRating_LzmaEnc(UInt64 dictSize) const
{
CBenchProps props;
props.SetLzmaCompexity();
return props.GetRating_Enc(dictSize, GlobalTime, GlobalFreq, UnpackSize * NumIterations);
}
UInt64 CBenchInfo::GetRating_LzmaDec() const
{
CBenchProps props;
props.SetLzmaCompexity();
return props.GetRating_Dec(GlobalTime, GlobalFreq, UnpackSize, PackSize, NumIterations);
}
#ifndef Z7_ST
#define NUM_CPU_LEVELS_MAX 3
struct CAffinityMode
{
unsigned NumBundleThreads;
unsigned NumLevels;
unsigned NumCoreThreads;
unsigned NumCores;
// unsigned DivideNum;
UInt32 Sizes[NUM_CPU_LEVELS_MAX];
void SetLevels(unsigned numCores, unsigned numCoreThreads);
DWORD_PTR GetAffinityMask(UInt32 bundleIndex, CCpuSet *cpuSet) const;
bool NeedAffinity() const { return NumBundleThreads != 0; }
WRes CreateThread_WithAffinity(NWindows::CThread &thread, THREAD_FUNC_TYPE startAddress, LPVOID parameter, UInt32 bundleIndex) const
{
if (NeedAffinity())
{
CCpuSet cpuSet;
GetAffinityMask(bundleIndex, &cpuSet);
return thread.Create_With_CpuSet(startAddress, parameter, &cpuSet);
}
return thread.Create(startAddress, parameter);
}
CAffinityMode():
NumBundleThreads(0),
NumLevels(0),
NumCoreThreads(1)
// DivideNum(1)
{}
};
void CAffinityMode::SetLevels(unsigned numCores, unsigned numCoreThreads)
{
NumCores = numCores;
NumCoreThreads = numCoreThreads;
NumLevels = 0;
if (numCoreThreads == 0 || numCores == 0 || numCores % numCoreThreads != 0)
return;
UInt32 c = numCores / numCoreThreads;
UInt32 c2 = 1;
while ((c & 1) == 0)
{
c >>= 1;
c2 <<= 1;
}
if (c2 != 1)
Sizes[NumLevels++] = c2;
if (c != 1)
Sizes[NumLevels++] = c;
if (numCoreThreads != 1)
Sizes[NumLevels++] = numCoreThreads;
if (NumLevels == 0)
Sizes[NumLevels++] = 1;
/*
printf("\n Cores:");
for (unsigned i = 0; i < NumLevels; i++)
{
printf(" %d", Sizes[i]);
}
printf("\n");
*/
}
DWORD_PTR CAffinityMode::GetAffinityMask(UInt32 bundleIndex, CCpuSet *cpuSet) const
{
CpuSet_Zero(cpuSet);
if (NumLevels == 0)
return 0;
// printf("\n%2d", bundleIndex);
/*
UInt32 low = 0;
if (DivideNum != 1)
{
low = bundleIndex % DivideNum;
bundleIndex /= DivideNum;
}
*/
UInt32 numGroups = NumCores / NumBundleThreads;
UInt32 m = bundleIndex % numGroups;
UInt32 v = 0;
for (unsigned i = 0; i < NumLevels; i++)
{
UInt32 size = Sizes[i];
while ((size & 1) == 0)
{
v *= 2;
v |= (m & 1);
m >>= 1;
size >>= 1;
}
v *= size;
v += m % size;
m /= size;
}
// UInt32 nb = NumBundleThreads / DivideNum;
UInt32 nb = NumBundleThreads;
DWORD_PTR mask = ((DWORD_PTR)1 << nb) - 1;
// v += low;
mask <<= v;
// printf(" %2d %8x \n ", v, (unsigned)mask);
#ifdef _WIN32
*cpuSet = mask;
#else
{
for (unsigned k = 0; k < nb; k++)
CpuSet_Set(cpuSet, v + k);
}
#endif
return mask;
}
struct CBenchSyncCommon
{
bool ExitMode;
NSynchronization::CManualResetEvent StartEvent;
CBenchSyncCommon(): ExitMode(false) {}
};
#endif
enum E_CheckCrcMode
{
k_CheckCrcMode_Never = 0,
k_CheckCrcMode_Always = 1,
k_CheckCrcMode_FirstPass = 2
};
class CEncoderInfo;
class CEncoderInfo Z7_final
{
Z7_CLASS_NO_COPY(CEncoderInfo)
public:
#ifndef Z7_ST
NWindows::CThread thread[2];
NSynchronization::CManualResetEvent ReadyEvent;
UInt32 NumDecoderSubThreads;
CBenchSyncCommon *Common;
UInt32 EncoderIndex;
UInt32 NumEncoderInternalThreads;
CAffinityMode AffinityMode;
bool IsGlobalMtMode; // if more than one benchmark encoder threads
#endif
CMyComPtr<ICompressCoder> _encoder;
CMyComPtr<ICompressFilter> _encoderFilter;
CBenchProgressInfo *progressInfoSpec[2];
CMyComPtr<ICompressProgressInfo> progressInfo[2];
UInt64 NumIterations;
UInt32 Salt;
#ifdef USE_ALLOCA
size_t AllocaSize;
#endif
unsigned KeySize;
Byte _key[32];
Byte _iv[16];
HRESULT Set_Key_and_IV(ICryptoProperties *cp)
{
RINOK(cp->SetKey(_key, KeySize))
return cp->SetInitVector(_iv, sizeof(_iv));
}
Byte _psw[16];
bool CheckCrc_Enc; /* = 1, if we want to check packed data crcs after each pass
used for filter and usual coders */
bool UseRealData_Enc; /* = 1, if we want to use only original data for each pass
used only for filter */
E_CheckCrcMode CheckCrcMode_Dec;
struct CDecoderInfo
{
CEncoderInfo *Encoder;
UInt32 DecoderIndex;
bool CallbackMode;
#ifdef USE_ALLOCA
size_t AllocaSize;
#endif
};
CDecoderInfo decodersInfo[2];
CMyComPtr<ICompressCoder> _decoders[2];
CMyComPtr<ICompressFilter> _decoderFilter;
HRESULT Results[2];
CBenchmarkOutStream *outStreamSpec;
CMyComPtr<ISequentialOutStream> outStream;
IBenchCallback *callback;
IBenchPrintCallback *printCallback;
UInt32 crc;
size_t kBufferSize;
size_t compressedSize;
const Byte *uncompressedDataPtr;
const Byte *fileData;
CBenchRandomGenerator rg;
CMidAlignedBuffer rgCopy; // it must be 16-byte aligned !!!
// CBenchmarkOutStream *propStreamSpec;
Byte propsData[kMaxMethodPropSize];
CBufPtrSeqOutStream *propStreamSpec;
CMyComPtr<ISequentialOutStream> propStream;
unsigned generateDictBits;
COneMethodInfo _method;
// for decode
size_t _uncompressedDataSize;
HRESULT Generate();
HRESULT Encode();
HRESULT Decode(UInt32 decoderIndex);
CEncoderInfo():
#ifndef Z7_ST
Common(NULL),
IsGlobalMtMode(true),
#endif
Salt(0),
KeySize(0),
CheckCrc_Enc(true),
UseRealData_Enc(true),
CheckCrcMode_Dec(k_CheckCrcMode_Always),
outStreamSpec(NULL),
callback(NULL),
printCallback(NULL),
fileData(NULL),
propStreamSpec(NULL)
{}
#ifndef Z7_ST
static THREAD_FUNC_DECL EncodeThreadFunction(void *param)
{
HRESULT res;
CEncoderInfo *encoder = (CEncoderInfo *)param;
try
{
#ifdef USE_ALLOCA
alloca(encoder->AllocaSize);
#endif
res = encoder->Encode();
}
catch(...)
{
res = E_FAIL;
}
encoder->Results[0] = res;
if (res != S_OK)
encoder->progressInfoSpec[0]->Status->SetResult(res);
encoder->ReadyEvent.Set();
return THREAD_FUNC_RET_ZERO;
}
static THREAD_FUNC_DECL DecodeThreadFunction(void *param)
{
CDecoderInfo *decoder = (CDecoderInfo *)param;
#ifdef USE_ALLOCA
alloca(decoder->AllocaSize);
#endif
CEncoderInfo *encoder = decoder->Encoder;
encoder->Results[decoder->DecoderIndex] = encoder->Decode(decoder->DecoderIndex);
return THREAD_FUNC_RET_ZERO;
}
HRESULT CreateEncoderThread()
{
WRes res = 0;
if (!ReadyEvent.IsCreated())
res = ReadyEvent.Create();
if (res == 0)
res = AffinityMode.CreateThread_WithAffinity(thread[0], EncodeThreadFunction, this,
EncoderIndex);
return HRESULT_FROM_WIN32(res);
}
HRESULT CreateDecoderThread(unsigned index, bool callbackMode
#ifdef USE_ALLOCA
, size_t allocaSize
#endif
)
{
CDecoderInfo &decoder = decodersInfo[index];
decoder.DecoderIndex = index;
decoder.Encoder = this;
#ifdef USE_ALLOCA
decoder.AllocaSize = allocaSize;
#endif
decoder.CallbackMode = callbackMode;
WRes res = AffinityMode.CreateThread_WithAffinity(thread[index], DecodeThreadFunction, &decoder,
// EncoderIndex * NumEncoderInternalThreads + index
EncoderIndex
);
return HRESULT_FROM_WIN32(res);
}
#endif
};
static size_t GetBenchCompressedSize(size_t bufferSize)
{
return kCompressedAdditionalSize + bufferSize + bufferSize / 16;
// kBufferSize / 2;
}
HRESULT CEncoderInfo::Generate()
{
const COneMethodInfo &method = _method;
// we need extra space, if input data is already compressed
const size_t kCompressedBufferSize = _encoderFilter ?
kBufferSize :
GetBenchCompressedSize(kBufferSize);
if (kCompressedBufferSize < kBufferSize)
return E_FAIL;
uncompressedDataPtr = fileData;
if (fileData)
{
#if !defined(Z7_ST)
if (IsGlobalMtMode)
{
/* we copy the data to local buffer of thread to eliminate
using of shared buffer by different threads */
ALLOC_WITH_HRESULT(&rg, kBufferSize)
memcpy((Byte *)rg, fileData, kBufferSize);
uncompressedDataPtr = (const Byte *)rg;
}
#endif
}
else
{
ALLOC_WITH_HRESULT(&rg, kBufferSize)
// DWORD ttt = GetTickCount();
if (generateDictBits == 0)
rg.GenerateSimpleRandom(Salt);
else
{
if (generateDictBits >= sizeof(size_t) * 8
&& kBufferSize > ((size_t)1 << (sizeof(size_t) * 8 - 1)))
return E_INVALIDARG;
rg.GenerateLz(generateDictBits, Salt);
// return E_ABORT; // for debug
}
// printf("\n%d\n ", GetTickCount() - ttt);
crc = CrcCalc((const Byte *)rg, rg.Size());
uncompressedDataPtr = (const Byte *)rg;
}
if (!outStream)
{
outStreamSpec = new CBenchmarkOutStream;
outStream = outStreamSpec;
}
ALLOC_WITH_HRESULT(outStreamSpec, kCompressedBufferSize)
if (_encoderFilter)
{
/* we try to reduce the number of memcpy() in main encoding loop.
so we copy data to temp buffers here */
ALLOC_WITH_HRESULT(&rgCopy, kBufferSize)
memcpy((Byte *)*outStreamSpec, uncompressedDataPtr, kBufferSize);
memcpy((Byte *)rgCopy, uncompressedDataPtr, kBufferSize);
}
if (!propStream)
{
propStreamSpec = new CBufPtrSeqOutStream; // CBenchmarkOutStream;
propStream = propStreamSpec;
}
// ALLOC_WITH_HRESULT_2(propStreamSpec, kMaxMethodPropSize);
// propStreamSpec->Init(true, false);
propStreamSpec->Init(propsData, sizeof(propsData));
CMyComPtr<IUnknown> coder;
if (_encoderFilter)
coder = _encoderFilter;
else
coder = _encoder;
{
CMyComPtr<ICompressSetCoderProperties> scp;
coder.QueryInterface(IID_ICompressSetCoderProperties, &scp);
if (scp)
{
const UInt64 reduceSize = kBufferSize;
/* in posix new thread uses same affinity as parent thread,
so we don't need to send affinity to coder in posix */
UInt64 affMask;
#if !defined(Z7_ST) && defined(_WIN32)
{
CCpuSet cpuSet;
affMask = AffinityMode.GetAffinityMask(EncoderIndex, &cpuSet);
}
#else
affMask = 0;
#endif
// affMask <<= 3; // debug line: to test no affinity in coder;
// affMask = 0;
RINOK(method.SetCoderProps_DSReduce_Aff(scp, &reduceSize, (affMask != 0 ? &affMask : NULL)))
}
else
{
if (method.AreThereNonOptionalProps())
return E_INVALIDARG;
}
CMyComPtr<ICompressWriteCoderProperties> writeCoderProps;
coder.QueryInterface(IID_ICompressWriteCoderProperties, &writeCoderProps);
if (writeCoderProps)
{
RINOK(writeCoderProps->WriteCoderProperties(propStream))
}
{
CMyComPtr<ICryptoSetPassword> sp;
coder.QueryInterface(IID_ICryptoSetPassword, &sp);
if (sp)
{
RINOK(sp->CryptoSetPassword(_psw, sizeof(_psw)))
// we must call encoding one time to calculate password key for key cache.
// it must be after WriteCoderProperties!
Byte temp[16];
memset(temp, 0, sizeof(temp));
if (_encoderFilter)
{
_encoderFilter->Init();
_encoderFilter->Filter(temp, sizeof(temp));
}
else
{
CBenchmarkInStream *inStreamSpec = new CBenchmarkInStream;
CMyComPtr<ISequentialInStream> inStream = inStreamSpec;
inStreamSpec->Init(temp, sizeof(temp));
CCrcOutStream *crcStreamSpec = new CCrcOutStream;
CMyComPtr<ISequentialOutStream> crcStream = crcStreamSpec;
crcStreamSpec->Init();
RINOK(_encoder->Code(inStream, crcStream, NULL, NULL, NULL))
}
}
}
}
return S_OK;
}
static void My_FilterBench(ICompressFilter *filter, Byte *data, size_t size, UInt32 *crc)
{
while (size != 0)
{
UInt32 cur = crc ? 1 << 17 : 1 << 24;
if (cur > size)
cur = (UInt32)size;
UInt32 processed = filter->Filter(data, cur);
/* if (processed > size) (in AES filter), we must fill last block with zeros.
but it is not important for benchmark. So we just copy that data without filtering.
if (processed == 0) then filter can't process more */
if (processed > size || processed == 0)
processed = (UInt32)size;
if (crc)
*crc = CrcUpdate(*crc, data, processed);
data += processed;
size -= processed;
}
}
HRESULT CEncoderInfo::Encode()
{
// printf("\nCEncoderInfo::Generate\n");
RINOK(Generate())
// printf("\n2222\n");
#ifndef Z7_ST
if (Common)
{
Results[0] = S_OK;
WRes wres = ReadyEvent.Set();
if (wres == 0)
wres = Common->StartEvent.Lock();
if (wres != 0)
return HRESULT_FROM_WIN32(wres);
if (Common->ExitMode)
return S_OK;
}
else
#endif
{
CBenchProgressInfo *bpi = progressInfoSpec[0];
bpi->SetStartTime();
}
CBenchInfo &bi = progressInfoSpec[0]->BenchInfo;
bi.UnpackSize = 0;
bi.PackSize = 0;
CMyComPtr<ICryptoProperties> cp;
CMyComPtr<IUnknown> coder;
if (_encoderFilter)
coder = _encoderFilter;
else
coder = _encoder;
coder.QueryInterface(IID_ICryptoProperties, &cp);
CBenchmarkInStream *inStreamSpec = new CBenchmarkInStream;
CMyComPtr<ISequentialInStream> inStream = inStreamSpec;
if (cp)
{
RINOK(Set_Key_and_IV(cp))
}
compressedSize = 0;
if (_encoderFilter)
compressedSize = kBufferSize;
// CBenchmarkOutStream *outStreamSpec = this->outStreamSpec;
UInt64 prev = 0;
const UInt32 mask = (CheckCrc_Enc ? 0 : 0xFFFF);
const bool useCrc = (mask < NumIterations);
bool crcPrev_defined = false;
UInt32 crcPrev = 0;
bool useRealData_Enc = UseRealData_Enc;
bool data_Was_Changed = false;
if (useRealData_Enc)
{
/* we want memcpy() for each iteration including first iteration.
So results will be equal for different number of iterations */
data_Was_Changed = true;
}
const UInt64 numIterations = NumIterations;
UInt64 i = numIterations;
// printCallback->NewLine();
while (i != 0)
{
i--;
if (printCallback && bi.UnpackSize - prev >= (1 << 26))
{
prev = bi.UnpackSize;
RINOK(printCallback->CheckBreak())
}
/*
CBenchInfo info;
progressInfoSpec[0]->SetStartTime();
*/
bool calcCrc = false;
if (useCrc)
calcCrc = (((UInt32)i & mask) == 0);
if (_encoderFilter)
{
Byte *filterData = rgCopy;
if (i == numIterations - 1 || calcCrc || useRealData_Enc)
{
filterData = (Byte *)*outStreamSpec;
if (data_Was_Changed)
memcpy(filterData, uncompressedDataPtr, kBufferSize);
data_Was_Changed = true;
}
_encoderFilter->Init();
if (calcCrc)
outStreamSpec->InitCrc();
My_FilterBench(_encoderFilter, filterData, kBufferSize,
calcCrc ? &outStreamSpec->Crc : NULL);
}
else
{
outStreamSpec->Init(true, calcCrc); // write real data for speed consistency at any number of iterations
inStreamSpec->Init(uncompressedDataPtr, kBufferSize);
RINOK(_encoder->Code(inStream, outStream, NULL, NULL, progressInfo[0]))
if (!inStreamSpec->WasFinished())
return E_FAIL;
if (compressedSize != outStreamSpec->Pos)
{
if (compressedSize != 0)
return E_FAIL;
compressedSize = outStreamSpec->Pos;
}
}
// outStreamSpec->Print();
if (calcCrc)
{
const UInt32 crc2 = CRC_GET_DIGEST(outStreamSpec->Crc);
if (crcPrev_defined && crcPrev != crc2)
return E_FAIL;
crcPrev = crc2;
crcPrev_defined = true;
}
bi.UnpackSize += kBufferSize;
bi.PackSize += compressedSize;
/*
{
progressInfoSpec[0]->SetFinishTime(info);
info.UnpackSize = 0;
info.PackSize = 0;
info.NumIterations = 1;
info.UnpackSize = kBufferSize;
info.PackSize = compressedSize;
// printf("\n%7d\n", encoder.compressedSize);
RINOK(callback->SetEncodeResult(info, true))
printCallback->NewLine();
}
*/
}
_encoder.Release();
_encoderFilter.Release();
return S_OK;
}
HRESULT CEncoderInfo::Decode(UInt32 decoderIndex)
{
CBenchmarkInStream *inStreamSpec = new CBenchmarkInStream;
CMyComPtr<ISequentialInStream> inStream = inStreamSpec;
CMyComPtr<ICompressCoder> &decoder = _decoders[decoderIndex];
CMyComPtr<IUnknown> coder;
if (_decoderFilter)
{
if (decoderIndex != 0)
return E_FAIL;
coder = _decoderFilter;
}
else
coder = decoder;
CMyComPtr<ICompressSetDecoderProperties2> setDecProps;
coder.QueryInterface(IID_ICompressSetDecoderProperties2, &setDecProps);
if (!setDecProps && propStreamSpec->GetPos() != 0)
return E_FAIL;
CCrcOutStream *crcOutStreamSpec = new CCrcOutStream;
CMyComPtr<ISequentialOutStream> crcOutStream = crcOutStreamSpec;
CBenchProgressInfo *pi = progressInfoSpec[decoderIndex];
pi->BenchInfo.UnpackSize = 0;
pi->BenchInfo.PackSize = 0;
#ifndef Z7_ST
{
CMyComPtr<ICompressSetCoderMt> setCoderMt;
coder.QueryInterface(IID_ICompressSetCoderMt, &setCoderMt);
if (setCoderMt)
{
RINOK(setCoderMt->SetNumberOfThreads(NumDecoderSubThreads))
}
}
#endif
CMyComPtr<ICompressSetCoderProperties> scp;
coder.QueryInterface(IID_ICompressSetCoderProperties, &scp);
if (scp)
{
const UInt64 reduceSize = _uncompressedDataSize;
RINOK(_method.SetCoderProps(scp, &reduceSize))
}
CMyComPtr<ICryptoProperties> cp;
coder.QueryInterface(IID_ICryptoProperties, &cp);
if (setDecProps)
{
RINOK(setDecProps->SetDecoderProperties2(
/* (const Byte *)*propStreamSpec, */
propsData,
(UInt32)propStreamSpec->GetPos()))
}
{
CMyComPtr<ICryptoSetPassword> sp;
coder.QueryInterface(IID_ICryptoSetPassword, &sp);
if (sp)
{
RINOK(sp->CryptoSetPassword(_psw, sizeof(_psw)))
}
}
UInt64 prev = 0;
if (cp)
{
RINOK(Set_Key_and_IV(cp))
}
CMyComPtr<ICompressSetFinishMode> setFinishMode;
if (_decoderFilter)
{
if (compressedSize > rgCopy.Size())
return E_FAIL;
}
else
{
decoder->QueryInterface(IID_ICompressSetFinishMode, (void **)&setFinishMode);
}
const UInt64 numIterations = NumIterations;
const E_CheckCrcMode checkCrcMode = CheckCrcMode_Dec;
for (UInt64 i = 0; i < numIterations; i++)
{
if (printCallback && pi->BenchInfo.UnpackSize - prev >= (1 << 26))
{
RINOK(printCallback->CheckBreak())
prev = pi->BenchInfo.UnpackSize;
}
const UInt64 outSize = kBufferSize;
bool calcCrc = (checkCrcMode != k_CheckCrcMode_Never);
crcOutStreamSpec->Init();
if (_decoderFilter)
{
Byte *filterData = (Byte *)*outStreamSpec;
if (calcCrc)
{
calcCrc = (i == 0);
if (checkCrcMode == k_CheckCrcMode_Always)
{
calcCrc = true;
memcpy((Byte *)rgCopy, (const Byte *)*outStreamSpec, compressedSize);
filterData = rgCopy;
}
}
_decoderFilter->Init();
My_FilterBench(_decoderFilter, filterData, compressedSize,
calcCrc ? &crcOutStreamSpec->Crc : NULL);
}
else
{
crcOutStreamSpec->CalcCrc = calcCrc;
inStreamSpec->Init((const Byte *)*outStreamSpec, compressedSize);
if (setFinishMode)
{
RINOK(setFinishMode->SetFinishMode(BoolToUInt(true)))
}
RINOK(decoder->Code(inStream, crcOutStream, NULL, &outSize, progressInfo[decoderIndex]))
if (setFinishMode)
{
if (!inStreamSpec->WasFinished())
return S_FALSE;
CMyComPtr<ICompressGetInStreamProcessedSize> getInStreamProcessedSize;
decoder.QueryInterface(IID_ICompressGetInStreamProcessedSize, (void **)&getInStreamProcessedSize);
if (getInStreamProcessedSize)
{
UInt64 processed;
RINOK(getInStreamProcessedSize->GetInStreamProcessedSize(&processed))
if (processed != compressedSize)
return S_FALSE;
}
}
if (crcOutStreamSpec->Pos != outSize)
return S_FALSE;
}
if (calcCrc && CRC_GET_DIGEST(crcOutStreamSpec->Crc) != crc)
return S_FALSE;
pi->BenchInfo.UnpackSize += kBufferSize;
pi->BenchInfo.PackSize += compressedSize;
}
decoder.Release();
_decoderFilter.Release();
return S_OK;
}
static const UInt32 kNumThreadsMax = (1 << 12);
struct CBenchEncoders
{
CEncoderInfo *encoders;
CBenchEncoders(UInt32 num): encoders(NULL) { encoders = new CEncoderInfo[num]; }
~CBenchEncoders() { delete []encoders; }
};
static UInt64 GetNumIterations(UInt64 numCommands, UInt64 complexInCommands)
{
if (numCommands < (1 << 4))
numCommands = (1 << 4);
UInt64 res = complexInCommands / numCommands;
return (res == 0 ? 1 : res);
}
#ifndef Z7_ST
// ---------- CBenchThreadsFlusher ----------
struct CBenchThreadsFlusher
{
CBenchEncoders *EncodersSpec;
CBenchSyncCommon Common;
unsigned NumThreads;
bool NeedClose;
CBenchThreadsFlusher(): NumThreads(0), NeedClose(false) {}
~CBenchThreadsFlusher()
{
StartAndWait(true);
}
WRes StartAndWait(bool exitMode = false);
};
WRes CBenchThreadsFlusher::StartAndWait(bool exitMode)
{
if (!NeedClose)
return 0;
Common.ExitMode = exitMode;
WRes res = Common.StartEvent.Set();
for (unsigned i = 0; i < NumThreads; i++)
{
NWindows::CThread &t = EncodersSpec->encoders[i].thread[0];
if (t.IsCreated())
{
WRes res2 = t.Wait_Close();
if (res == 0)
res = res2;
}
}
NeedClose = false;
return res;
}
#endif // Z7_ST
static void SetPseudoRand(Byte *data, size_t size, UInt32 startValue)
{
for (size_t i = 0; i < size; i++)
{
data[i] = (Byte)startValue;
startValue++;
}
}
static HRESULT MethodBench(
DECL_EXTERNAL_CODECS_LOC_VARS
UInt64 complexInCommands,
#ifndef Z7_ST
bool oldLzmaBenchMode,
UInt32 numThreads,
const CAffinityMode *affinityMode,
#endif
const COneMethodInfo &method2,
size_t uncompressedDataSize,
const Byte *fileData,
unsigned generateDictBits,
IBenchPrintCallback *printCallback,
IBenchCallback *callback,
CBenchProps *benchProps)
{
COneMethodInfo method = method2;
UInt64 methodId;
UInt32 numStreams;
bool isFilter;
const int codecIndex = FindMethod_Index(
EXTERNAL_CODECS_LOC_VARS
method.MethodName, true,
methodId, numStreams, isFilter);
if (codecIndex < 0)
return E_NOTIMPL;
if (numStreams != 1)
return E_INVALIDARG;
UInt32 numEncoderThreads = 1;
UInt32 numSubDecoderThreads = 1;
#ifndef Z7_ST
numEncoderThreads = numThreads;
if (oldLzmaBenchMode)
if (methodId == k_LZMA)
{
if (numThreads == 1 && method.Get_NumThreads() < 0)
method.AddProp_NumThreads(1);
const UInt32 numLzmaThreads = method.Get_Lzma_NumThreads();
if (numThreads > 1 && numLzmaThreads > 1)
{
numEncoderThreads = (numThreads + 1) / 2; // 20.03
numSubDecoderThreads = 2;
}
}
const bool mtEncMode = (numEncoderThreads > 1) || affinityMode->NeedAffinity();
#endif
CBenchEncoders encodersSpec(numEncoderThreads);
CEncoderInfo *encoders = encodersSpec.encoders;
UInt32 i;
for (i = 0; i < numEncoderThreads; i++)
{
CEncoderInfo &encoder = encoders[i];
encoder.callback = (i == 0) ? callback : NULL;
encoder.printCallback = printCallback;
#ifndef Z7_ST
encoder.EncoderIndex = i;
encoder.NumEncoderInternalThreads = numSubDecoderThreads;
encoder.AffinityMode = *affinityMode;
/*
if (numSubDecoderThreads > 1)
if (encoder.AffinityMode.NeedAffinity()
&& encoder.AffinityMode.NumBundleThreads == 1)
{
// if old LZMA benchmark uses two threads in coder, we increase (NumBundleThreads) for old LZMA benchmark uses two threads instead of one
if (encoder.AffinityMode.NumBundleThreads * 2 <= encoder.AffinityMode.NumCores)
encoder.AffinityMode.NumBundleThreads *= 2;
}
*/
#endif
{
CCreatedCoder cod;
RINOK(CreateCoder_Index(EXTERNAL_CODECS_LOC_VARS (unsigned)codecIndex, true, encoder._encoderFilter, cod))
encoder._encoder = cod.Coder;
if (!encoder._encoder && !encoder._encoderFilter)
return E_NOTIMPL;
}
SetPseudoRand(encoder._iv, sizeof(encoder._iv), 17);
SetPseudoRand(encoder._key, sizeof(encoder._key), 51);
SetPseudoRand(encoder._psw, sizeof(encoder._psw), 123);
for (UInt32 j = 0; j < numSubDecoderThreads; j++)
{
CCreatedCoder cod;
CMyComPtr<ICompressCoder> &decoder = encoder._decoders[j];
RINOK(CreateCoder_Id(EXTERNAL_CODECS_LOC_VARS methodId, false, encoder._decoderFilter, cod))
decoder = cod.Coder;
if (!encoder._decoderFilter && !decoder)
return E_NOTIMPL;
}
encoder.UseRealData_Enc =
encoder.CheckCrc_Enc = (benchProps->EncComplex) > 30;
encoder.CheckCrcMode_Dec = k_CheckCrcMode_Always;
if (benchProps->DecComplexCompr +
benchProps->DecComplexUnc <= 30)
encoder.CheckCrcMode_Dec =
k_CheckCrcMode_FirstPass; // for filters
// k_CheckCrcMode_Never; // for debug
// k_CheckCrcMode_Always; // for debug
if (fileData)
{
encoder.UseRealData_Enc = true;
encoder.CheckCrcMode_Dec = k_CheckCrcMode_Always;
}
}
UInt32 crc = 0;
if (fileData)
crc = CrcCalc(fileData, uncompressedDataSize);
for (i = 0; i < numEncoderThreads; i++)
{
CEncoderInfo &encoder = encoders[i];
encoder._method = method;
encoder.generateDictBits = generateDictBits;
encoder._uncompressedDataSize = uncompressedDataSize;
encoder.kBufferSize = uncompressedDataSize;
encoder.fileData = fileData;
encoder.crc = crc;
}
CBenchProgressStatus status;
status.Res = S_OK;
status.EncodeMode = true;
#ifndef Z7_ST
CBenchThreadsFlusher encoderFlusher;
if (mtEncMode)
{
WRes wres = encoderFlusher.Common.StartEvent.Create();
if (wres != 0)
return HRESULT_FROM_WIN32(wres);
encoderFlusher.NumThreads = numEncoderThreads;
encoderFlusher.EncodersSpec = &encodersSpec;
encoderFlusher.NeedClose = true;
}
#endif
for (i = 0; i < numEncoderThreads; i++)
{
CEncoderInfo &encoder = encoders[i];
encoder.NumIterations = GetNumIterations(benchProps->GetNumCommands_Enc(uncompressedDataSize), complexInCommands);
// encoder.NumIterations = 3;
encoder.Salt = g_CrcTable[i & 0xFF];
encoder.Salt ^= (g_CrcTable[(i >> 8) & 0xFF] << 3);
// (g_CrcTable[0] == 0), and (encoder.Salt == 0) for first thread
// printf(" %8x", encoder.Salt);
encoder.KeySize = benchProps->KeySize;
for (int j = 0; j < 2; j++)
{
CBenchProgressInfo *spec = new CBenchProgressInfo;
encoder.progressInfoSpec[j] = spec;
encoder.progressInfo[j] = spec;
spec->Status = &status;
}
if (i == 0)
{
CBenchProgressInfo *bpi = encoder.progressInfoSpec[0];
bpi->Callback = callback;
bpi->BenchInfo.NumIterations = numEncoderThreads;
}
#ifndef Z7_ST
if (mtEncMode)
{
#ifdef USE_ALLOCA
encoder.AllocaSize = (i * 16 * 21) & 0x7FF;
#endif
encoder.Common = &encoderFlusher.Common;
encoder.IsGlobalMtMode = numEncoderThreads > 1;
RINOK(encoder.CreateEncoderThread())
}
#endif
}
if (printCallback)
{
RINOK(printCallback->CheckBreak())
}
#ifndef Z7_ST
if (mtEncMode)
{
for (i = 0; i < numEncoderThreads; i++)
{
CEncoderInfo &encoder = encoders[i];
const WRes wres = encoder.ReadyEvent.Lock();
if (wres != 0)
return HRESULT_FROM_WIN32(wres);
RINOK(encoder.Results[0])
}
CBenchProgressInfo *bpi = encoders[0].progressInfoSpec[0];
bpi->SetStartTime();
const WRes wres = encoderFlusher.StartAndWait();
if (status.Res == 0 && wres != 0)
return HRESULT_FROM_WIN32(wres);
}
else
#endif
{
RINOK(encoders[0].Encode())
}
RINOK(status.Res)
CBenchInfo info;
encoders[0].progressInfoSpec[0]->SetFinishTime(info);
info.UnpackSize = 0;
info.PackSize = 0;
info.NumIterations = encoders[0].NumIterations;
for (i = 0; i < numEncoderThreads; i++)
{
const CEncoderInfo &encoder = encoders[i];
info.UnpackSize += encoder.kBufferSize;
info.PackSize += encoder.compressedSize;
// printf("\n%7d\n", encoder.compressedSize);
}
RINOK(callback->SetEncodeResult(info, true))
// ---------- Decode ----------
status.Res = S_OK;
status.EncodeMode = false;
const UInt32 numDecoderThreads = numEncoderThreads * numSubDecoderThreads;
#ifndef Z7_ST
const bool mtDecoderMode = (numDecoderThreads > 1) || affinityMode->NeedAffinity();
#endif
for (i = 0; i < numEncoderThreads; i++)
{
CEncoderInfo &encoder = encoders[i];
/*
#ifndef Z7_ST
// encoder.affinityMode = *affinityMode;
if (encoder.NumEncoderInternalThreads != 1)
encoder.AffinityMode.DivideNum = encoder.NumEncoderInternalThreads;
#endif
*/
if (i == 0)
{
encoder.NumIterations = GetNumIterations(
benchProps->GetNumCommands_Dec(
encoder.compressedSize,
encoder.kBufferSize),
complexInCommands);
CBenchProgressInfo *bpi = encoder.progressInfoSpec[0];
bpi->Callback = callback;
bpi->BenchInfo.NumIterations = numDecoderThreads;
bpi->SetStartTime();
}
else
encoder.NumIterations = encoders[0].NumIterations;
#ifndef Z7_ST
{
int numSubThreads = method.Get_NumThreads();
encoder.NumDecoderSubThreads = (numSubThreads <= 0) ? 1 : (unsigned)numSubThreads;
}
if (mtDecoderMode)
{
for (UInt32 j = 0; j < numSubDecoderThreads; j++)
{
const HRESULT res = encoder.CreateDecoderThread(j, (i == 0 && j == 0)
#ifdef USE_ALLOCA
, ((i * numSubDecoderThreads + j) * 16 * 21) & 0x7FF
#endif
);
RINOK(res)
}
}
else
#endif
{
RINOK(encoder.Decode(0))
}
}
#ifndef Z7_ST
if (mtDecoderMode)
{
WRes wres = 0;
HRESULT res = S_OK;
for (i = 0; i < numEncoderThreads; i++)
for (UInt32 j = 0; j < numSubDecoderThreads; j++)
{
CEncoderInfo &encoder = encoders[i];
const WRes wres2 = encoder.thread[j].
// Wait(); // later we can get thread times from thread in UNDER_CE
Wait_Close();
if (wres == 0 && wres2 != 0)
wres = wres2;
const HRESULT res2 = encoder.Results[j];
if (res == 0 && res2 != 0)
res = res2;
}
if (wres != 0)
return HRESULT_FROM_WIN32(wres);
RINOK(res)
}
#endif // Z7_ST
RINOK(status.Res)
encoders[0].progressInfoSpec[0]->SetFinishTime(info);
/*
#ifndef Z7_ST
#ifdef UNDER_CE
if (mtDecoderMode)
for (i = 0; i < numEncoderThreads; i++)
for (UInt32 j = 0; j < numSubDecoderThreads; j++)
{
FILETIME creationTime, exitTime, kernelTime, userTime;
if (::GetThreadTimes(encoders[i].thread[j], &creationTime, &exitTime, &kernelTime, &userTime) != 0)
info.UserTime += GetTime64(userTime) + GetTime64(kernelTime);
}
#endif
#endif
*/
info.UnpackSize = 0;
info.PackSize = 0;
info.NumIterations = numSubDecoderThreads * encoders[0].NumIterations;
for (i = 0; i < numEncoderThreads; i++)
{
const CEncoderInfo &encoder = encoders[i];
info.UnpackSize += encoder.kBufferSize;
info.PackSize += encoder.compressedSize;
}
// RINOK(callback->SetDecodeResult(info, false)) // why we called before 21.03 ??
RINOK(callback->SetDecodeResult(info, true))
return S_OK;
}
static inline UInt64 GetDictSizeFromLog(unsigned dictSizeLog)
{
/*
if (dictSizeLog < 32)
return (UInt32)1 << dictSizeLog;
else
return (UInt32)(Int32)-1;
*/
return (UInt64)1 << dictSizeLog;
}
// it's limit of current LZMA implementation that can be changed later
#define kLzmaMaxDictSize ((UInt32)15 << 28)
static inline UInt64 GetLZMAUsage(bool multiThread, int btMode, UInt64 dict)
{
if (dict == 0)
dict = 1;
if (dict > kLzmaMaxDictSize)
dict = kLzmaMaxDictSize;
UInt32 hs = (UInt32)dict - 1;
hs |= (hs >> 1);
hs |= (hs >> 2);
hs |= (hs >> 4);
hs |= (hs >> 8);
hs >>= 1;
hs |= 0xFFFF;
if (hs > (1 << 24))
hs >>= 1;
hs++;
hs += (1 << 16);
const UInt32 kBlockSizeMax = (UInt32)0 - (UInt32)(1 << 16);
UInt64 blockSize = (UInt64)dict + (1 << 16)
+ (multiThread ? (1 << 20) : 0);
blockSize += (blockSize >> (blockSize < ((UInt32)1 << 30) ? 1 : 2));
if (blockSize >= kBlockSizeMax)
blockSize = kBlockSizeMax;
UInt64 son = (UInt64)dict;
if (btMode)
son *= 2;
const UInt64 v = (hs + son) * 4 + blockSize +
(1 << 20) + (multiThread ? (6 << 20) : 0);
// printf("\nGetLZMAUsage = %d\n", (UInt32)(v >> 20));
// printf("\nblockSize = %d\n", (UInt32)(blockSize >> 20));
return v;
}
UInt64 GetBenchMemoryUsage(UInt32 numThreads, int level, UInt64 dictionary, bool totalBench)
{
const size_t kBufferSize = (size_t)dictionary + kAdditionalSize;
const UInt64 kCompressedBufferSize = GetBenchCompressedSize(kBufferSize); // / 2;
if (level < 0)
level = 5;
const int algo = (level < 5 ? 0 : 1);
const int btMode = (algo == 0 ? 0 : 1);
UInt32 numBigThreads = numThreads;
bool lzmaMt = (totalBench || (numThreads > 1 && btMode));
if (btMode)
{
if (!totalBench && lzmaMt)
numBigThreads /= 2;
}
return ((UInt64)kBufferSize + kCompressedBufferSize +
GetLZMAUsage(lzmaMt, btMode, dictionary) + (2 << 20)) * numBigThreads;
}
static UInt64 GetBenchMemoryUsage_Hash(UInt32 numThreads, UInt64 dictionary)
{
// dictionary += (dictionary >> 9); // for page tables (virtual memory)
return (UInt64)(dictionary + (1 << 15)) * numThreads + (2 << 20);
}
// ---------- CRC and HASH ----------
struct CCrcInfo_Base
{
CMidAlignedBuffer Buffer;
const Byte *Data;
size_t Size;
bool CreateLocalBuf;
UInt32 CheckSum_Res;
CCrcInfo_Base(): CreateLocalBuf(true), CheckSum_Res(0) {}
HRESULT Generate(const Byte *data, size_t size);
HRESULT CrcProcess(UInt64 numIterations,
const UInt32 *checkSum, IHasher *hf,
IBenchPrintCallback *callback);
};
HRESULT CCrcInfo_Base::Generate(const Byte *data, size_t size)
{
Size = size;
Data = data;
if (!data || CreateLocalBuf)
{
ALLOC_WITH_HRESULT(&Buffer, size)
Data = Buffer;
}
if (!data)
RandGen(Buffer, size);
else if (CreateLocalBuf && size != 0)
memcpy(Buffer, data, size);
return S_OK;
}
HRESULT CCrcInfo_Base::CrcProcess(UInt64 numIterations,
const UInt32 *checkSum, IHasher *hf,
IBenchPrintCallback *callback)
{
MY_ALIGN(16)
Byte hash[64];
memset(hash, 0, sizeof(hash));
CheckSum_Res = 0;
const UInt32 hashSize = hf->GetDigestSize();
if (hashSize > sizeof(hash))
return S_FALSE;
const Byte *buf = Data;
const size_t size = Size;
UInt32 checkSum_Prev = 0;
UInt64 prev = 0;
UInt64 cur = 0;
for (UInt64 i = 0; i < numIterations; i++)
{
hf->Init();
size_t pos = 0;
do
{
const size_t rem = size - pos;
const UInt32 kStep = ((UInt32)1 << 31);
const UInt32 curSize = (rem < kStep) ? (UInt32)rem : kStep;
hf->Update(buf + pos, curSize);
pos += curSize;
}
while (pos != size);
hf->Final(hash);
UInt32 sum = 0;
for (UInt32 j = 0; j < hashSize; j += 4)
{
sum = rotlFixed(sum, 11);
sum += GetUi32(hash + j);
}
if (checkSum)
{
if (sum != *checkSum)
return S_FALSE;
}
else
{
checkSum_Prev = sum;
checkSum = &checkSum_Prev;
}
if (callback)
{
cur += size;
if (cur - prev >= ((UInt32)1 << 30))
{
prev = cur;
RINOK(callback->CheckBreak())
}
}
}
CheckSum_Res = checkSum_Prev;
return S_OK;
}
extern
UInt32 g_BenchCpuFreqTemp; // we need non-static variavble to disable compiler optimization
UInt32 g_BenchCpuFreqTemp = 1;
#define YY1 sum += val; sum ^= val;
#define YY3 YY1 YY1 YY1 YY1
#define YY5 YY3 YY3 YY3 YY3
#define YY7 YY5 YY5 YY5 YY5
static const UInt32 kNumFreqCommands = 128;
EXTERN_C_BEGIN
static UInt32 CountCpuFreq(UInt32 sum, UInt32 num, UInt32 val)
{
for (UInt32 i = 0; i < num; i++)
{
YY7
}
return sum;
}
EXTERN_C_END
#ifndef Z7_ST
struct CBaseThreadInfo
{
NWindows::CThread Thread;
IBenchPrintCallback *Callback;
HRESULT CallbackRes;
WRes Wait_If_Created()
{
if (!Thread.IsCreated())
return 0;
return Thread.Wait_Close();
}
};
struct CFreqInfo: public CBaseThreadInfo
{
UInt32 ValRes;
UInt32 Size;
UInt64 NumIterations;
};
static THREAD_FUNC_DECL FreqThreadFunction(void *param)
{
CFreqInfo *p = (CFreqInfo *)param;
UInt32 sum = g_BenchCpuFreqTemp;
for (UInt64 k = p->NumIterations; k > 0; k--)
{
if (p->Callback)
{
p->CallbackRes = p->Callback->CheckBreak();
if (p->CallbackRes != S_OK)
break;
}
sum = CountCpuFreq(sum, p->Size, g_BenchCpuFreqTemp);
}
p->ValRes = sum;
return THREAD_FUNC_RET_ZERO;
}
struct CFreqThreads
{
CFreqInfo *Items;
UInt32 NumThreads;
CFreqThreads(): Items(NULL), NumThreads(0) {}
WRes WaitAll()
{
WRes wres = 0;
for (UInt32 i = 0; i < NumThreads; i++)
{
WRes wres2 = Items[i].Wait_If_Created();
if (wres == 0 && wres2 != 0)
wres = wres2;
}
NumThreads = 0;
return wres;
}
~CFreqThreads()
{
WaitAll();
delete []Items;
}
};
static THREAD_FUNC_DECL CrcThreadFunction(void *param);
struct CCrcInfo: public CBaseThreadInfo
{
const Byte *Data;
size_t Size;
UInt64 NumIterations;
bool CheckSumDefined;
UInt32 CheckSum;
CMyComPtr<IHasher> Hasher;
HRESULT Res;
UInt32 CheckSum_Res;
#ifndef Z7_ST
NSynchronization::CManualResetEvent ReadyEvent;
UInt32 ThreadIndex;
CBenchSyncCommon *Common;
CAffinityMode AffinityMode;
#endif
// we want to call CCrcInfo_Base::Buffer.Free() in main thread.
// so we uses non-local CCrcInfo_Base.
CCrcInfo_Base crcib;
HRESULT CreateThread()
{
WRes res = 0;
if (!ReadyEvent.IsCreated())
res = ReadyEvent.Create();
if (res == 0)
res = AffinityMode.CreateThread_WithAffinity(Thread, CrcThreadFunction, this,
ThreadIndex);
return HRESULT_FROM_WIN32(res);
}
#ifdef USE_ALLOCA
size_t AllocaSize;
#endif
void Process();
CCrcInfo(): Res(E_FAIL) {}
};
static const bool k_Crc_CreateLocalBuf_For_File = true; // for total BW test
// static const bool k_Crc_CreateLocalBuf_For_File = false; // for shared memory read test
void CCrcInfo::Process()
{
crcib.CreateLocalBuf = k_Crc_CreateLocalBuf_For_File;
// we can use additional Generate() passes to reduce some time effects for new page allocation
// for (unsigned y = 0; y < 10; y++)
Res = crcib.Generate(Data, Size);
// if (Common)
{
WRes wres = ReadyEvent.Set();
if (wres != 0)
{
if (Res == 0)
Res = HRESULT_FROM_WIN32(wres);
return;
}
if (Res != 0)
return;
wres = Common->StartEvent.Lock();
if (wres != 0)
{
Res = HRESULT_FROM_WIN32(wres);
return;
}
if (Common->ExitMode)
return;
}
Res = crcib.CrcProcess(NumIterations,
CheckSumDefined ? &CheckSum : NULL, Hasher,
Callback);
CheckSum_Res = crcib.CheckSum_Res;
/*
We don't want to include the time of slow CCrcInfo_Base::Buffer.Free()
to time of benchmark. So we don't free Buffer here
*/
// crcib.Buffer.Free();
}
static THREAD_FUNC_DECL CrcThreadFunction(void *param)
{
CCrcInfo *p = (CCrcInfo *)param;
#ifdef USE_ALLOCA
alloca(p->AllocaSize);
#endif
p->Process();
return THREAD_FUNC_RET_ZERO;
}
struct CCrcThreads
{
CCrcInfo *Items;
unsigned NumThreads;
CBenchSyncCommon Common;
bool NeedClose;
CCrcThreads(): Items(NULL), NumThreads(0), NeedClose(false) {}
WRes StartAndWait(bool exitMode = false);
~CCrcThreads()
{
StartAndWait(true);
delete []Items;
}
};
WRes CCrcThreads::StartAndWait(bool exitMode)
{
if (!NeedClose)
return 0;
Common.ExitMode = exitMode;
WRes wres = Common.StartEvent.Set();
for (unsigned i = 0; i < NumThreads; i++)
{
WRes wres2 = Items[i].Wait_If_Created();
if (wres == 0 && wres2 != 0)
wres = wres2;
}
NumThreads = 0;
NeedClose = false;
return wres;
}
#endif
static UInt32 CrcCalc1(const Byte *buf, size_t size)
{
UInt32 crc = CRC_INIT_VAL;
for (size_t i = 0; i < size; i++)
crc = CRC_UPDATE_BYTE(crc, buf[i]);
return CRC_GET_DIGEST(crc);
}
/*
static UInt32 RandGenCrc(Byte *buf, size_t size, CBaseRandomGenerator &RG)
{
RandGen(buf, size, RG);
return CrcCalc1(buf, size);
}
*/
static bool CrcInternalTest()
{
CAlignedBuffer buffer;
const size_t kBufferSize0 = (1 << 8);
const size_t kBufferSize1 = (1 << 10);
const unsigned kCheckSize = (1 << 5);
buffer.Alloc(kBufferSize0 + kBufferSize1);
if (!buffer.IsAllocated())
return false;
Byte *buf = (Byte *)buffer;
size_t i;
for (i = 0; i < kBufferSize0; i++)
buf[i] = (Byte)i;
UInt32 crc1 = CrcCalc1(buf, kBufferSize0);
if (crc1 != 0x29058C73)
return false;
RandGen(buf + kBufferSize0, kBufferSize1);
for (i = 0; i < kBufferSize0 + kBufferSize1 - kCheckSize; i++)
for (unsigned j = 0; j < kCheckSize; j++)
if (CrcCalc1(buf + i, j) != CrcCalc(buf + i, j))
return false;
return true;
}
struct CBenchMethod
{
unsigned Weight;
unsigned DictBits;
Int32 EncComplex;
Int32 DecComplexCompr;
Int32 DecComplexUnc;
const char *Name;
// unsigned KeySize;
};
// #define USE_SW_CMPLX
#ifdef USE_SW_CMPLX
#define CMPLX(x) ((x) * 1000)
#else
#define CMPLX(x) (x)
#endif
static const CBenchMethod g_Bench[] =
{
// { 40, 17, 357, 145, 20, "LZMA:x1" },
// { 20, 18, 360, 145, 20, "LZMA2:x1:mt2" },
{ 20, 18, 360, 145, 20, "LZMA:x1" },
{ 20, 22, 600, 145, 20, "LZMA:x3" },
{ 80, 24, 1220, 145, 20, "LZMA:x5:mt1" },
{ 80, 24, 1220, 145, 20, "LZMA:x5:mt2" },
{ 10, 16, 124, 40, 14, "Deflate:x1" },
{ 20, 16, 376, 40, 14, "Deflate:x5" },
{ 10, 16, 1082, 40, 14, "Deflate:x7" },
{ 10, 17, 422, 40, 14, "Deflate64:x5" },
{ 10, 15, 590, 69, 69, "BZip2:x1" },
{ 20, 19, 815, 122, 122, "BZip2:x5" },
{ 10, 19, 815, 122, 122, "BZip2:x5:mt2" },
{ 10, 19, 2530, 122, 122, "BZip2:x7" },
// { 10, 18, 1010, 0, 1150, "PPMDZip:x1" },
{ 10, 18, 1010, 0, 1150, "PPMD:x1" },
// { 10, 22, 1655, 0, 1830, "PPMDZip:x5" },
{ 10, 22, 1655, 0, 1830, "PPMD:x5" },
// { 2, 0, -16, 0, -16, "Swap2" },
{ 2, 0, -16, 0, -16, "Swap4" },
// { 2, 0, 3, 0, 4, "Delta:1" },
// { 2, 0, 3, 0, 4, "Delta:2" },
// { 2, 0, 3, 0, 4, "Delta:3" },
{ 2, 0, 3, 0, 4, "Delta:4" },
// { 2, 0, 3, 0, 4, "Delta:8" },
// { 2, 0, 3, 0, 4, "Delta:32" },
{ 2, 0, 2, 0, 2, "BCJ" },
{ 2, 0, 1, 0, 1, "ARM64" },
// { 10, 0, 18, 0, 18, "AES128CBC:1" },
// { 10, 0, 21, 0, 21, "AES192CBC:1" },
{ 10, 0, 24, 0, 24, "AES256CBC:1" },
// { 10, 0, 18, 0, 18, "AES128CTR:1" },
// { 10, 0, 21, 0, 21, "AES192CTR:1" },
// { 10, 0, 24, 0, 24, "AES256CTR:1" },
// { 2, 0, CMPLX(6), 0, CMPLX(1), "AES128CBC:2" },
// { 2, 0, CMPLX(7), 0, CMPLX(1), "AES192CBC:2" },
{ 2, 0, CMPLX(8), 0, CMPLX(1), "AES256CBC:2" },
// { 2, 0, CMPLX(1), 0, CMPLX(1), "AES128CTR:2" },
// { 2, 0, CMPLX(1), 0, CMPLX(1), "AES192CTR:2" },
// { 2, 0, CMPLX(1), 0, CMPLX(1), "AES256CTR:2" },
// { 1, 0, CMPLX(6), 0, CMPLX(1), "AES128CBC:3" },
// { 1, 0, CMPLX(7), 0, CMPLX(1), "AES192CBC:3" },
{ 1, 0, CMPLX(8), 0, CMPLX(1), "AES256CBC:3" }
// { 1, 0, CMPLX(1), 0, CMPLX(1), "AES128CTR:3" },
// { 1, 0, CMPLX(1), 0, CMPLX(1), "AES192CTR:3" },
// { 1, 0, CMPLX(1), 0, CMPLX(1), "AES256CTR:3" },
};
struct CBenchHash
{
unsigned Weight;
UInt32 Complex;
UInt32 CheckSum;
const char *Name;
};
// #define ARM_CRC_MUL 100
#define ARM_CRC_MUL 1
#define k_Hash_Complex_Mult 256
static const CBenchHash g_Hash[] =
{
// { 1, 1820, 0x21e207bb, "CRC32:1" },
// { 10, 558, 0x21e207bb, "CRC32:4" },
{ 20, 339, 0x21e207bb, "CRC32:8" } ,
{ 2, 128 *ARM_CRC_MUL, 0x21e207bb, "CRC32:32" },
{ 2, 64 *ARM_CRC_MUL, 0x21e207bb, "CRC32:64" },
{ 10, 512, 0x41b901d1, "CRC64" },
{ 10, 5100, 0x7913ba03, "SHA256:1" },
{ 2, CMPLX((32 * 4 + 1) * 4 + 4), 0x7913ba03, "SHA256:2" },
{ 10, 2340, 0xff769021, "SHA1:1" },
{ 2, CMPLX((20 * 6 + 1) * 4 + 4), 0xff769021, "SHA1:2" },
{ 2, 5500, 0x85189d02, "BLAKE2sp" }
};
static void PrintNumber(IBenchPrintCallback &f, UInt64 value, unsigned size)
{
char s[128];
unsigned startPos = (unsigned)sizeof(s) - 32;
memset(s, ' ', startPos);
ConvertUInt64ToString(value, s + startPos);
// if (withSpace)
{
startPos--;
size++;
}
unsigned len = (unsigned)strlen(s + startPos);
if (size > len)
{
size -= len;
if (startPos < size)
startPos = 0;
else
startPos -= size;
}
f.Print(s + startPos);
}
static const unsigned kFieldSize_Name = 12;
static const unsigned kFieldSize_SmallName = 4;
static const unsigned kFieldSize_Speed = 9;
static const unsigned kFieldSize_Usage = 5;
static const unsigned kFieldSize_RU = 6;
static const unsigned kFieldSize_Rating = 6;
static const unsigned kFieldSize_EU = 5;
static const unsigned kFieldSize_Effec = 5;
static const unsigned kFieldSize_CrcSpeed = 8;
static const unsigned kFieldSize_TotalSize = 4 + kFieldSize_Speed + kFieldSize_Usage + kFieldSize_RU + kFieldSize_Rating;
static const unsigned kFieldSize_EUAndEffec = 2 + kFieldSize_EU + kFieldSize_Effec;
static void PrintRating(IBenchPrintCallback &f, UInt64 rating, unsigned size)
{
PrintNumber(f, (rating + 500000) / 1000000, size);
}
static void PrintPercents(IBenchPrintCallback &f, UInt64 val, UInt64 divider, unsigned size)
{
UInt64 v = 0;
if (divider != 0)
v = (val * 100 + divider / 2) / divider;
PrintNumber(f, v, size);
}
static void PrintChars(IBenchPrintCallback &f, char c, unsigned size)
{
char s[256];
memset(s, (Byte)c, size);
s[size] = 0;
f.Print(s);
}
static void PrintSpaces(IBenchPrintCallback &f, unsigned size)
{
PrintChars(f, ' ', size);
}
static void PrintUsage(IBenchPrintCallback &f, UInt64 usage, unsigned size)
{
PrintNumber(f, Benchmark_GetUsage_Percents(usage), size);
}
static void PrintResults(IBenchPrintCallback &f, UInt64 usage, UInt64 rpu, UInt64 rating, bool showFreq, UInt64 cpuFreq)
{
PrintUsage(f, usage, kFieldSize_Usage);
PrintRating(f, rpu, kFieldSize_RU);
PrintRating(f, rating, kFieldSize_Rating);
if (showFreq)
{
if (cpuFreq == 0)
PrintSpaces(f, kFieldSize_EUAndEffec);
else
{
PrintPercents(f, rating, cpuFreq * usage / kBenchmarkUsageMult, kFieldSize_EU);
PrintPercents(f, rating, cpuFreq, kFieldSize_Effec);
}
}
}
void CTotalBenchRes::Generate_From_BenchInfo(const CBenchInfo &info)
{
Speed = info.GetUnpackSizeSpeed();
Usage = info.GetUsage();
RPU = info.GetRatingPerUsage(Rating);
}
void CTotalBenchRes::Mult_For_Weight(unsigned weight)
{
NumIterations2 *= weight;
RPU *= weight;
Rating *= weight;
Usage *= weight;
Speed *= weight;
}
void CTotalBenchRes::Update_With_Res(const CTotalBenchRes &r)
{
Rating += r.Rating;
Usage += r.Usage;
RPU += r.RPU;
Speed += r.Speed;
// NumIterations1 = (r1.NumIterations1 + r2.NumIterations1);
NumIterations2 += r.NumIterations2;
}
static void PrintResults(IBenchPrintCallback *f,
const CBenchInfo &info,
unsigned weight,
UInt64 rating,
bool showFreq, UInt64 cpuFreq,
CTotalBenchRes *res)
{
CTotalBenchRes t;
t.Rating = rating;
t.NumIterations2 = 1;
t.Generate_From_BenchInfo(info);
if (f)
{
if (t.Speed != 0)
PrintNumber(*f, t.Speed / 1024, kFieldSize_Speed);
else
PrintSpaces(*f, 1 + kFieldSize_Speed);
}
if (f)
{
PrintResults(*f, t.Usage, t.RPU, rating, showFreq, cpuFreq);
}
if (res)
{
// res->NumIterations1++;
t.Mult_For_Weight(weight);
res->Update_With_Res(t);
}
}
static void PrintTotals(IBenchPrintCallback &f,
bool showFreq, UInt64 cpuFreq, bool showSpeed, const CTotalBenchRes &res)
{
const UInt64 numIterations2 = res.NumIterations2 ? res.NumIterations2 : 1;
const UInt64 speed = res.Speed / numIterations2;
if (showSpeed && speed != 0)
PrintNumber(f, speed / 1024, kFieldSize_Speed);
else
PrintSpaces(f, 1 + kFieldSize_Speed);
// PrintSpaces(f, 1 + kFieldSize_Speed);
// UInt64 numIterations1 = res.NumIterations1; if (numIterations1 == 0) numIterations1 = 1;
PrintResults(f, res.Usage / numIterations2, res.RPU / numIterations2, res.Rating / numIterations2, showFreq, cpuFreq);
}
static void PrintHex(AString &s, UInt64 v)
{
char temp[32];
ConvertUInt64ToHex(v, temp);
s += temp;
}
AString GetProcessThreadsInfo(const NSystem::CProcessAffinity &ti)
{
AString s;
// s.Add_UInt32(ti.numProcessThreads);
unsigned numSysThreads = ti.GetNumSystemThreads();
if (ti.GetNumProcessThreads() != numSysThreads)
{
// if (ti.numProcessThreads != ti.numSysThreads)
{
s += " / ";
s.Add_UInt32(numSysThreads);
}
s += " : ";
#ifdef _WIN32
PrintHex(s, ti.processAffinityMask);
s += " / ";
PrintHex(s, ti.systemAffinityMask);
#else
unsigned i = (numSysThreads + 3) & ~(unsigned)3;
if (i == 0)
i = 4;
for (; i >= 4; )
{
i -= 4;
unsigned val = 0;
for (unsigned k = 0; k < 4; k++)
{
const unsigned bit = (ti.IsCpuSet(i + k) ? 1 : 0);
val += (bit << k);
}
PrintHex(s, val);
}
#endif
}
return s;
}
#ifdef Z7_LARGE_PAGES
#ifdef _WIN32
extern bool g_LargePagesMode;
extern "C"
{
extern SIZE_T g_LargePageSize;
}
#endif
void Add_LargePages_String(AString &s)
{
#ifdef _WIN32
if (g_LargePagesMode || g_LargePageSize != 0)
{
s.Add_OptSpaced("(LP-");
PrintSize_KMGT_Or_Hex(s, g_LargePageSize);
#ifdef MY_CPU_X86_OR_AMD64
if (CPU_IsSupported_PageGB())
s += "-1G";
#endif
if (!g_LargePagesMode)
s += "-NA";
s += ")";
}
#else
s += "";
#endif
}
#endif
static void PrintRequirements(IBenchPrintCallback &f, const char *sizeString,
bool size_Defined, UInt64 size, const char *threadsString, UInt32 numThreads)
{
f.Print("RAM ");
f.Print(sizeString);
if (size_Defined)
PrintNumber(f, (size >> 20), 6);
else
f.Print(" ?");
f.Print(" MB");
#ifdef Z7_LARGE_PAGES
{
AString s;
Add_LargePages_String(s);
f.Print(s);
}
#endif
f.Print(", # ");
f.Print(threadsString);
PrintNumber(f, numThreads, 3);
}
struct CBenchCallbackToPrint Z7_final: public IBenchCallback
{
bool NeedPrint;
bool Use2Columns;
bool ShowFreq;
unsigned NameFieldSize;
unsigned EncodeWeight;
unsigned DecodeWeight;
UInt64 CpuFreq;
UInt64 DictSize;
IBenchPrintCallback *_file;
CBenchProps BenchProps;
CTotalBenchRes EncodeRes;
CTotalBenchRes DecodeRes;
CBenchInfo BenchInfo_Results[2];
CBenchCallbackToPrint():
NeedPrint(true),
Use2Columns(false),
ShowFreq(false),
NameFieldSize(0),
EncodeWeight(1),
DecodeWeight(1),
CpuFreq(0)
{}
void Init() { EncodeRes.Init(); DecodeRes.Init(); }
void Print(const char *s);
void NewLine();
HRESULT SetFreq(bool showFreq, UInt64 cpuFreq);
HRESULT SetEncodeResult(const CBenchInfo &info, bool final) Z7_override;
HRESULT SetDecodeResult(const CBenchInfo &info, bool final) Z7_override;
};
HRESULT CBenchCallbackToPrint::SetFreq(bool showFreq, UInt64 cpuFreq)
{
ShowFreq = showFreq;
CpuFreq = cpuFreq;
return S_OK;
}
HRESULT CBenchCallbackToPrint::SetEncodeResult(const CBenchInfo &info, bool final)
{
RINOK(_file->CheckBreak())
if (final)
BenchInfo_Results[0] = info;
if (final)
if (NeedPrint)
{
const UInt64 rating = BenchProps.GetRating_Enc(DictSize, info.GlobalTime, info.GlobalFreq, info.UnpackSize * info.NumIterations);
PrintResults(_file, info,
EncodeWeight, rating,
ShowFreq, CpuFreq, &EncodeRes);
if (!Use2Columns)
_file->NewLine();
}
return S_OK;
}
static const char * const kSep = " | ";
HRESULT CBenchCallbackToPrint::SetDecodeResult(const CBenchInfo &info, bool final)
{
RINOK(_file->CheckBreak())
if (final)
BenchInfo_Results[1] = info;
if (final)
if (NeedPrint)
{
const UInt64 rating = BenchProps.GetRating_Dec(info.GlobalTime, info.GlobalFreq, info.UnpackSize, info.PackSize, info.NumIterations);
if (Use2Columns)
_file->Print(kSep);
else
PrintSpaces(*_file, NameFieldSize);
CBenchInfo info2 = info;
info2.UnpackSize *= info2.NumIterations;
info2.PackSize *= info2.NumIterations;
info2.NumIterations = 1;
PrintResults(_file, info2,
DecodeWeight, rating,
ShowFreq, CpuFreq, &DecodeRes);
}
return S_OK;
}
void CBenchCallbackToPrint::Print(const char *s)
{
_file->Print(s);
}
void CBenchCallbackToPrint::NewLine()
{
_file->NewLine();
}
static void PrintLeft(IBenchPrintCallback &f, const char *s, unsigned size)
{
f.Print(s);
int numSpaces = (int)size - (int)MyStringLen(s);
if (numSpaces > 0)
PrintSpaces(f, (unsigned)numSpaces);
}
static void PrintRight(IBenchPrintCallback &f, const char *s, unsigned size)
{
int numSpaces = (int)size - (int)MyStringLen(s);
if (numSpaces > 0)
PrintSpaces(f, (unsigned)numSpaces);
f.Print(s);
}
static bool DoesWildcardMatchName_NoCase(const AString &mask, const char *name)
{
UString wildc = GetUnicodeString(mask);
UString bname = GetUnicodeString(name);
wildc.MakeLower_Ascii();
bname.MakeLower_Ascii();
return DoesWildcardMatchName(wildc, bname);
}
static HRESULT TotalBench(
DECL_EXTERNAL_CODECS_LOC_VARS
const COneMethodInfo &methodMask,
UInt64 complexInCommands,
#ifndef Z7_ST
UInt32 numThreads,
const CAffinityMode *affinityMode,
#endif
bool forceUnpackSize,
size_t unpackSize,
const Byte *fileData,
IBenchPrintCallback *printCallback, CBenchCallbackToPrint *callback)
{
for (unsigned i = 0; i < Z7_ARRAY_SIZE(g_Bench); i++)
{
const CBenchMethod &bench = g_Bench[i];
if (!DoesWildcardMatchName_NoCase(methodMask.MethodName, bench.Name))
continue;
PrintLeft(*callback->_file, bench.Name, kFieldSize_Name);
{
unsigned keySize = 32;
if (IsString1PrefixedByString2(bench.Name, "AES128")) keySize = 16;
else if (IsString1PrefixedByString2(bench.Name, "AES192")) keySize = 24;
callback->BenchProps.KeySize = keySize;
}
callback->BenchProps.DecComplexUnc = bench.DecComplexUnc;
callback->BenchProps.DecComplexCompr = bench.DecComplexCompr;
callback->BenchProps.EncComplex = bench.EncComplex;
COneMethodInfo method;
NCOM::CPropVariant propVariant;
propVariant = bench.Name;
RINOK(method.ParseMethodFromPROPVARIANT(UString(), propVariant))
size_t unpackSize2 = unpackSize;
if (!forceUnpackSize && bench.DictBits == 0)
unpackSize2 = kFilterUnpackSize;
callback->EncodeWeight = bench.Weight;
callback->DecodeWeight = bench.Weight;
const HRESULT res = MethodBench(
EXTERNAL_CODECS_LOC_VARS
complexInCommands,
#ifndef Z7_ST
false, numThreads, affinityMode,
#endif
method,
unpackSize2, fileData,
bench.DictBits,
printCallback, callback, &callback->BenchProps);
if (res == E_NOTIMPL)
{
// callback->Print(" ---");
// we need additional empty line as line for decompression results
if (!callback->Use2Columns)
callback->NewLine();
}
else
{
RINOK(res)
}
callback->NewLine();
}
return S_OK;
}
struct CFreqBench
{
// in:
UInt64 complexInCommands;
UInt32 numThreads;
bool showFreq;
UInt64 specifiedFreq;
// out:
UInt64 CpuFreqRes;
UInt64 UsageRes;
UInt32 res;
CFreqBench()
{}
HRESULT FreqBench(IBenchPrintCallback *_file
#ifndef Z7_ST
, const CAffinityMode *affinityMode
#endif
);
};
HRESULT CFreqBench::FreqBench(IBenchPrintCallback *_file
#ifndef Z7_ST
, const CAffinityMode *affinityMode
#endif
)
{
res = 0;
CpuFreqRes = 0;
UsageRes = 0;
if (numThreads == 0)
numThreads = 1;
#ifdef Z7_ST
numThreads = 1;
#endif
const UInt32 complexity = kNumFreqCommands;
UInt64 numIterations = complexInCommands / complexity;
UInt32 numIterations2 = 1 << 30;
if (numIterations > numIterations2)
numIterations /= numIterations2;
else
{
numIterations2 = (UInt32)numIterations;
numIterations = 1;
}
CBenchInfoCalc progressInfoSpec;
#ifndef Z7_ST
bool mtMode = (numThreads > 1) || affinityMode->NeedAffinity();
if (mtMode)
{
CFreqThreads threads;
threads.Items = new CFreqInfo[numThreads];
UInt32 i;
for (i = 0; i < numThreads; i++)
{
CFreqInfo &info = threads.Items[i];
info.Callback = _file;
info.CallbackRes = S_OK;
info.NumIterations = numIterations;
info.Size = numIterations2;
}
progressInfoSpec.SetStartTime();
for (i = 0; i < numThreads; i++)
{
// Sleep(10);
CFreqInfo &info = threads.Items[i];
WRes wres = affinityMode->CreateThread_WithAffinity(info.Thread, FreqThreadFunction, &info, i);
if (info.Thread.IsCreated())
threads.NumThreads++;
if (wres != 0)
return HRESULT_FROM_WIN32(wres);
}
WRes wres = threads.WaitAll();
if (wres != 0)
return HRESULT_FROM_WIN32(wres);
for (i = 0; i < numThreads; i++)
{
RINOK(threads.Items[i].CallbackRes)
}
}
else
#endif
{
progressInfoSpec.SetStartTime();
UInt32 sum = g_BenchCpuFreqTemp;
for (UInt64 k = numIterations; k > 0; k--)
{
sum = CountCpuFreq(sum, numIterations2, g_BenchCpuFreqTemp);
if (_file)
{
RINOK(_file->CheckBreak())
}
}
res += sum;
}
if (res == 0x12345678)
if (_file)
{
RINOK(_file->CheckBreak())
}
CBenchInfo info;
progressInfoSpec.SetFinishTime(info);
info.UnpackSize = 0;
info.PackSize = 0;
info.NumIterations = 1;
const UInt64 numCommands = (UInt64)numIterations * numIterations2 * numThreads * complexity;
const UInt64 rating = info.GetSpeed(numCommands);
CpuFreqRes = rating / numThreads;
UsageRes = info.GetUsage();
if (_file)
{
PrintResults(_file, info,
0, // weight
rating,
showFreq, showFreq ? (specifiedFreq != 0 ? specifiedFreq : CpuFreqRes) : 0, NULL);
RINOK(_file->CheckBreak())
}
return S_OK;
}
static HRESULT CrcBench(
DECL_EXTERNAL_CODECS_LOC_VARS
UInt64 complexInCommands,
UInt32 numThreads,
const size_t bufferSize,
const Byte *fileData,
UInt64 &speed,
UInt64 &usage,
UInt32 complexity, unsigned benchWeight,
const UInt32 *checkSum,
const COneMethodInfo &method,
IBenchPrintCallback *_file,
#ifndef Z7_ST
const CAffinityMode *affinityMode,
#endif
bool showRating,
CTotalBenchRes *encodeRes,
bool showFreq, UInt64 cpuFreq)
{
if (numThreads == 0)
numThreads = 1;
#ifdef Z7_ST
numThreads = 1;
#endif
const AString &methodName = method.MethodName;
// methodName.RemoveChar(L'-');
CMethodId hashID;
if (!FindHashMethod(
EXTERNAL_CODECS_LOC_VARS
methodName, hashID))
return E_NOTIMPL;
/*
// if will generate random data in each thread, instead of global data
CMidAlignedBuffer buffer;
if (!fileData)
{
ALLOC_WITH_HRESULT(&buffer, bufferSize)
RandGen(buffer, bufferSize);
fileData = buffer;
}
*/
const size_t bsize = (bufferSize == 0 ? 1 : bufferSize);
UInt64 numIterations = complexInCommands * k_Hash_Complex_Mult / complexity / bsize;
if (numIterations == 0)
numIterations = 1;
CBenchInfoCalc progressInfoSpec;
CBenchInfo info;
#ifndef Z7_ST
bool mtEncMode = (numThreads > 1) || affinityMode->NeedAffinity();
if (mtEncMode)
{
CCrcThreads threads;
threads.Items = new CCrcInfo[numThreads];
{
WRes wres = threads.Common.StartEvent.Create();
if (wres != 0)
return HRESULT_FROM_WIN32(wres);
threads.NeedClose = true;
}
UInt32 i;
for (i = 0; i < numThreads; i++)
{
CCrcInfo &ci = threads.Items[i];
AString name;
RINOK(CreateHasher(EXTERNAL_CODECS_LOC_VARS hashID, name, ci.Hasher))
if (!ci.Hasher)
return E_NOTIMPL;
CMyComPtr<ICompressSetCoderProperties> scp;
ci.Hasher.QueryInterface(IID_ICompressSetCoderProperties, &scp);
if (scp)
{
RINOK(method.SetCoderProps(scp))
}
ci.Callback = _file;
ci.Data = fileData;
ci.NumIterations = numIterations;
ci.Size = bufferSize;
ci.CheckSumDefined = false;
if (checkSum)
{
ci.CheckSum = *checkSum;
ci.CheckSumDefined = true;
}
#ifdef USE_ALLOCA
ci.AllocaSize = (i * 16 * 21) & 0x7FF;
#endif
}
for (i = 0; i < numThreads; i++)
{
CCrcInfo &ci = threads.Items[i];
ci.ThreadIndex = i;
ci.Common = &threads.Common;
ci.AffinityMode = *affinityMode;
HRESULT hres = ci.CreateThread();
if (ci.Thread.IsCreated())
threads.NumThreads++;
if (hres != 0)
return hres;
}
for (i = 0; i < numThreads; i++)
{
CCrcInfo &ci = threads.Items[i];
WRes wres = ci.ReadyEvent.Lock();
if (wres != 0)
return HRESULT_FROM_WIN32(wres);
RINOK(ci.Res)
}
progressInfoSpec.SetStartTime();
WRes wres = threads.StartAndWait();
if (wres != 0)
return HRESULT_FROM_WIN32(wres);
progressInfoSpec.SetFinishTime(info);
for (i = 0; i < numThreads; i++)
{
RINOK(threads.Items[i].Res)
if (i != 0)
if (threads.Items[i].CheckSum_Res !=
threads.Items[i - 1].CheckSum_Res)
return S_FALSE;
}
}
else
#endif
{
CMyComPtr<IHasher> hasher;
AString name;
RINOK(CreateHasher(EXTERNAL_CODECS_LOC_VARS hashID, name, hasher))
if (!hasher)
return E_NOTIMPL;
CMyComPtr<ICompressSetCoderProperties> scp;
hasher.QueryInterface(IID_ICompressSetCoderProperties, &scp);
if (scp)
{
RINOK(method.SetCoderProps(scp))
}
CCrcInfo_Base crcib;
crcib.CreateLocalBuf = false;
RINOK(crcib.Generate(fileData, bufferSize))
progressInfoSpec.SetStartTime();
RINOK(crcib.CrcProcess(numIterations, checkSum, hasher, _file))
progressInfoSpec.SetFinishTime(info);
}
UInt64 unpSize = numIterations * bufferSize;
UInt64 unpSizeThreads = unpSize * numThreads;
info.UnpackSize = unpSizeThreads;
info.PackSize = unpSizeThreads;
info.NumIterations = 1;
if (_file)
{
if (showRating)
{
UInt64 unpSizeThreads2 = unpSizeThreads;
if (unpSizeThreads2 == 0)
unpSizeThreads2 = numIterations * 1 * numThreads;
const UInt64 numCommands = unpSizeThreads2 * complexity / 256;
const UInt64 rating = info.GetSpeed(numCommands);
PrintResults(_file, info,
benchWeight, rating,
showFreq, cpuFreq, encodeRes);
}
RINOK(_file->CheckBreak())
}
speed = info.GetSpeed(unpSizeThreads);
usage = info.GetUsage();
return S_OK;
}
static HRESULT TotalBench_Hash(
DECL_EXTERNAL_CODECS_LOC_VARS
const COneMethodInfo &methodMask,
UInt64 complexInCommands,
UInt32 numThreads,
size_t bufSize,
const Byte *fileData,
IBenchPrintCallback *printCallback, CBenchCallbackToPrint *callback,
#ifndef Z7_ST
const CAffinityMode *affinityMode,
#endif
CTotalBenchRes *encodeRes,
bool showFreq, UInt64 cpuFreq)
{
for (unsigned i = 0; i < Z7_ARRAY_SIZE(g_Hash); i++)
{
const CBenchHash &bench = g_Hash[i];
if (!DoesWildcardMatchName_NoCase(methodMask.MethodName, bench.Name))
continue;
PrintLeft(*callback->_file, bench.Name, kFieldSize_Name);
// callback->BenchProps.DecComplexUnc = bench.DecComplexUnc;
// callback->BenchProps.DecComplexCompr = bench.DecComplexCompr;
// callback->BenchProps.EncComplex = bench.EncComplex;
COneMethodInfo method;
NCOM::CPropVariant propVariant;
propVariant = bench.Name;
RINOK(method.ParseMethodFromPROPVARIANT(UString(), propVariant))
UInt64 speed, usage;
const HRESULT res = CrcBench(
EXTERNAL_CODECS_LOC_VARS
complexInCommands,
numThreads, bufSize, fileData,
speed, usage,
bench.Complex, bench.Weight,
(!fileData && bufSize == (1 << kNumHashDictBits)) ? &bench.CheckSum : NULL,
method,
printCallback,
#ifndef Z7_ST
affinityMode,
#endif
true, // showRating
encodeRes, showFreq, cpuFreq);
if (res == E_NOTIMPL)
{
// callback->Print(" ---");
}
else
{
RINOK(res)
}
callback->NewLine();
}
return S_OK;
}
struct CTempValues
{
UInt64 *Values;
CTempValues(): Values(NULL) {}
void Alloc(UInt32 num) { Values = new UInt64[num]; }
~CTempValues() { delete []Values; }
};
static void ParseNumberString(const UString &s, NCOM::CPropVariant &prop)
{
const wchar_t *end;
UInt64 result = ConvertStringToUInt64(s, &end);
if (*end != 0 || s.IsEmpty())
prop = s;
else if (result <= (UInt32)0xFFFFFFFF)
prop = (UInt32)result;
else
prop = result;
}
static bool AreSameMethodNames(const char *fullName, const char *shortName)
{
return StringsAreEqualNoCase_Ascii(fullName, shortName);
}
static void Print_Usage_and_Threads(IBenchPrintCallback &f, UInt64 usage, UInt32 threads)
{
PrintRequirements(f, "usage:", true, usage, "Benchmark threads: ", threads);
}
static void Print_Delimiter(IBenchPrintCallback &f)
{
f.Print(" |");
}
static void Print_Pow(IBenchPrintCallback &f, unsigned pow)
{
char s[16];
ConvertUInt32ToString(pow, s);
unsigned pos = MyStringLen(s);
s[pos++] = ':';
s[pos] = 0;
PrintLeft(f, s, kFieldSize_SmallName); // 4
}
static void Bench_BW_Print_Usage_Speed(IBenchPrintCallback &f,
UInt64 usage, UInt64 speed)
{
PrintUsage(f, usage, kFieldSize_Usage);
PrintNumber(f, speed / 1000000, kFieldSize_CrcSpeed);
}
HRESULT Bench(
DECL_EXTERNAL_CODECS_LOC_VARS
IBenchPrintCallback *printCallback,
IBenchCallback *benchCallback,
const CObjectVector<CProperty> &props,
UInt32 numIterations,
bool multiDict,
IBenchFreqCallback *freqCallback)
{
if (!CrcInternalTest())
return E_FAIL;
UInt32 numCPUs = 1;
UInt64 ramSize = (UInt64)(sizeof(size_t)) << 29;
NSystem::CProcessAffinity threadsInfo;
threadsInfo.InitST();
#ifndef Z7_ST
if (threadsInfo.Get() && threadsInfo.GetNumProcessThreads() != 0)
numCPUs = threadsInfo.GetNumProcessThreads();
else
numCPUs = NSystem::GetNumberOfProcessors();
#endif
// numCPUs = 24;
/*
{
DWORD_PTR mask = (1 << 0);
DWORD_PTR old = SetThreadAffinityMask(GetCurrentThread(), mask);
old = old;
DWORD_PTR old2 = SetThreadAffinityMask(GetCurrentThread(), mask);
old2 = old2;
return 0;
}
*/
bool ramSize_Defined = NSystem::GetRamSize(ramSize);
UInt32 numThreadsSpecified = numCPUs;
bool needSetComplexity = false;
UInt32 testTimeMs = kComplexInMs;
UInt32 startDicLog = 22;
bool startDicLog_Defined = false;
UInt64 specifiedFreq = 0;
bool multiThreadTests = false;
UInt64 complexInCommands = kComplexInCommands;
UInt32 numThreads_Start = 1;
#ifndef Z7_ST
CAffinityMode affinityMode;
#endif
COneMethodInfo method;
CMidAlignedBuffer fileDataBuffer;
bool use_fileData = false;
bool isFixedDict = false;
{
unsigned i;
if (printCallback)
{
for (i = 0; i < props.Size(); i++)
{
const CProperty &property = props[i];
printCallback->Print(" ");
printCallback->Print(GetAnsiString(property.Name));
if (!property.Value.IsEmpty())
{
printCallback->Print("=");
printCallback->Print(GetAnsiString(property.Value));
}
}
if (!props.IsEmpty())
printCallback->NewLine();
}
for (i = 0; i < props.Size(); i++)
{
const CProperty &property = props[i];
UString name (property.Name);
name.MakeLower_Ascii();
if (name.IsEqualTo("file"))
{
if (property.Value.IsEmpty())
return E_INVALIDARG;
NFile::NIO::CInFile file;
if (!file.Open(us2fs(property.Value)))
return GetLastError_noZero_HRESULT();
size_t len;
{
UInt64 len64;
if (!file.GetLength(len64))
return GetLastError_noZero_HRESULT();
if (printCallback)
{
printCallback->Print("file size =");
PrintNumber(*printCallback, len64, 0);
printCallback->NewLine();
}
len = (size_t)len64;
if (len != len64)
return E_INVALIDARG;
}
// (len == 0) is allowed. Also it's allowed if Alloc(0) returns NULL here
ALLOC_WITH_HRESULT(&fileDataBuffer, len)
use_fileData = true;
{
size_t processed;
if (!file.ReadFull((Byte *)fileDataBuffer, len, processed))
return GetLastError_noZero_HRESULT();
if (processed != len)
return E_FAIL;
}
continue;
}
NCOM::CPropVariant propVariant;
if (!property.Value.IsEmpty())
ParseNumberString(property.Value, propVariant);
if (name.IsEqualTo("time"))
{
RINOK(ParsePropToUInt32(UString(), propVariant, testTimeMs))
needSetComplexity = true;
testTimeMs *= 1000;
continue;
}
if (name.IsEqualTo("timems"))
{
RINOK(ParsePropToUInt32(UString(), propVariant, testTimeMs))
needSetComplexity = true;
continue;
}
if (name.IsEqualTo("tic"))
{
UInt32 v;
RINOK(ParsePropToUInt32(UString(), propVariant, v))
if (v >= 64)
return E_INVALIDARG;
complexInCommands = (UInt64)1 << v;
continue;
}
const bool isCurrent_fixedDict = name.IsEqualTo("df");
if (isCurrent_fixedDict)
isFixedDict = true;
if (isCurrent_fixedDict || name.IsEqualTo("ds"))
{
RINOK(ParsePropToUInt32(UString(), propVariant, startDicLog))
if (startDicLog > 32)
return E_INVALIDARG;
startDicLog_Defined = true;
continue;
}
if (name.IsEqualTo("mts"))
{
RINOK(ParsePropToUInt32(UString(), propVariant, numThreads_Start))
continue;
}
if (name.IsEqualTo("af"))
{
UInt32 bundle;
RINOK(ParsePropToUInt32(UString(), propVariant, bundle))
if (bundle > 0 && bundle < numCPUs)
{
#ifndef Z7_ST
affinityMode.SetLevels(numCPUs, 2);
affinityMode.NumBundleThreads = bundle;
#endif
}
continue;
}
if (name.IsEqualTo("freq"))
{
UInt32 freq32 = 0;
RINOK(ParsePropToUInt32(UString(), propVariant, freq32))
if (freq32 == 0)
return E_INVALIDARG;
specifiedFreq = (UInt64)freq32 * 1000000;
if (printCallback)
{
printCallback->Print("freq=");
PrintNumber(*printCallback, freq32, 0);
printCallback->NewLine();
}
continue;
}
if (name.IsPrefixedBy_Ascii_NoCase("mt"))
{
const UString s = name.Ptr(2);
if (s.IsEqualTo("*")
|| (s.IsEmpty()
&& propVariant.vt == VT_BSTR
&& StringsAreEqual_Ascii(propVariant.bstrVal, "*")))
{
multiThreadTests = true;
continue;
}
#ifndef Z7_ST
RINOK(ParseMtProp(s, propVariant, numCPUs, numThreadsSpecified))
#endif
continue;
}
RINOK(method.ParseMethodFromPROPVARIANT(name, propVariant))
}
}
if (printCallback)
{
AString s;
#ifndef _WIN32
s += "Compiler: ";
GetCompiler(s);
printCallback->Print(s);
printCallback->NewLine();
s.Empty();
#endif
GetSystemInfoText(s);
printCallback->Print(s);
printCallback->NewLine();
}
if (printCallback)
{
printCallback->Print("1T CPU Freq (MHz):");
}
if (printCallback || freqCallback)
{
UInt64 numMilCommands = 1 << 6;
if (specifiedFreq != 0)
{
while (numMilCommands > 1 && specifiedFreq < (numMilCommands * 1000000))
numMilCommands >>= 1;
}
for (int jj = 0;; jj++)
{
if (printCallback)
RINOK(printCallback->CheckBreak())
UInt64 start = ::GetTimeCount();
UInt32 sum = (UInt32)start;
sum = CountCpuFreq(sum, (UInt32)(numMilCommands * 1000000 / kNumFreqCommands), g_BenchCpuFreqTemp);
if (sum == 0xF1541213)
if (printCallback)
printCallback->Print("");
const UInt64 realDelta = ::GetTimeCount() - start;
start = realDelta;
if (start == 0)
start = 1;
if (start > (UInt64)1 << 61)
start = 1;
const UInt64 freq = GetFreq();
// mips is constant in some compilers
const UInt64 hz = MyMultDiv64(numMilCommands * 1000000, freq, start);
const UInt64 mipsVal = numMilCommands * freq / start;
if (printCallback)
{
if (realDelta == 0)
{
printCallback->Print(" -");
}
else
{
// PrintNumber(*printCallback, start, 0);
PrintNumber(*printCallback, mipsVal, 5);
}
}
if (freqCallback)
{
RINOK(freqCallback->AddCpuFreq(1, hz, kBenchmarkUsageMult))
}
if (jj >= 1)
{
bool needStop = (numMilCommands >= (1 <<
#ifdef _DEBUG
7
#else
11
#endif
));
if (start >= freq * 16)
{
printCallback->Print(" (Cmplx)");
if (!freqCallback) // we don't want complexity change for old gui lzma benchmark
{
needSetComplexity = true;
}
needStop = true;
}
if (needSetComplexity)
SetComplexCommandsMs(testTimeMs, false, mipsVal * 1000000, complexInCommands);
if (needStop)
break;
numMilCommands <<= 1;
}
}
if (freqCallback)
{
RINOK(freqCallback->FreqsFinished(1))
}
}
if (numThreadsSpecified >= 2)
if (printCallback || freqCallback)
{
if (printCallback)
printCallback->NewLine();
/* it can show incorrect frequency for HT threads.
so we reduce freq test to (numCPUs / 2) */
UInt32 numThreads = numThreadsSpecified >= numCPUs / 2 ? numCPUs / 2: numThreadsSpecified;
if (numThreads < 1)
numThreads = 1;
if (printCallback)
{
char s[128];
ConvertUInt64ToString(numThreads, s);
printCallback->Print(s);
printCallback->Print("T CPU Freq (MHz):");
}
UInt64 numMilCommands = 1 <<
#ifdef _DEBUG
7;
#else
10;
#endif
if (specifiedFreq != 0)
{
while (numMilCommands > 1 && specifiedFreq < (numMilCommands * 1000000))
numMilCommands >>= 1;
}
// for (int jj = 0;; jj++)
for (;;)
{
if (printCallback)
RINOK(printCallback->CheckBreak())
{
// PrintLeft(f, "CPU", kFieldSize_Name);
// UInt32 resVal;
CFreqBench fb;
fb.complexInCommands = numMilCommands * 1000000;
fb.numThreads = numThreads;
// showFreq;
// fb.showFreq = (freqTest == kNumCpuTests - 1 || specifiedFreq != 0);
fb.showFreq = true;
fb.specifiedFreq = 1;
const HRESULT res = fb.FreqBench(NULL /* printCallback */
#ifndef Z7_ST
, &affinityMode
#endif
);
RINOK(res)
if (freqCallback)
{
RINOK(freqCallback->AddCpuFreq(numThreads, fb.CpuFreqRes, fb.UsageRes))
}
if (printCallback)
{
/*
if (realDelta == 0)
{
printCallback->Print(" -");
}
else
*/
{
// PrintNumber(*printCallback, start, 0);
PrintUsage(*printCallback, fb.UsageRes, 3);
printCallback->Print("%");
PrintNumber(*printCallback, fb.CpuFreqRes / 1000000, 0);
printCallback->Print(" ");
// PrintNumber(*printCallback, fb.UsageRes, 5);
}
}
}
// if (jj >= 1)
{
const bool needStop = (numMilCommands >= (1 <<
#ifdef _DEBUG
7
#else
11
#endif
));
if (needStop)
break;
numMilCommands <<= 1;
}
}
if (freqCallback)
{
RINOK(freqCallback->FreqsFinished(numThreads))
}
}
if (printCallback)
{
printCallback->NewLine();
printCallback->NewLine();
PrintRequirements(*printCallback, "size: ", ramSize_Defined, ramSize, "CPU hardware threads:", numCPUs);
printCallback->Print(GetProcessThreadsInfo(threadsInfo));
printCallback->NewLine();
}
if (numThreadsSpecified < 1 || numThreadsSpecified > kNumThreadsMax)
return E_INVALIDARG;
UInt64 dict = (UInt64)1 << startDicLog;
const bool dictIsDefined = (isFixedDict || method.Get_DicSize(dict));
const unsigned level = method.GetLevel();
AString &methodName = method.MethodName;
const AString original_MethodName = methodName;
if (methodName.IsEmpty())
methodName = "LZMA";
if (benchCallback)
{
CBenchProps benchProps;
benchProps.SetLzmaCompexity();
const UInt64 dictSize = method.Get_Lzma_DicSize();
size_t uncompressedDataSize;
if (use_fileData)
{
uncompressedDataSize = fileDataBuffer.Size();
}
else
{
uncompressedDataSize = kAdditionalSize + (size_t)dictSize;
if (uncompressedDataSize < dictSize)
return E_INVALIDARG;
}
return MethodBench(
EXTERNAL_CODECS_LOC_VARS
complexInCommands,
#ifndef Z7_ST
true, numThreadsSpecified,
&affinityMode,
#endif
method,
uncompressedDataSize, (const Byte *)fileDataBuffer,
kOldLzmaDictBits, printCallback, benchCallback, &benchProps);
}
if (methodName.IsEqualTo_Ascii_NoCase("CRC"))
methodName = "crc32";
CMethodId hashID;
const bool isHashMethod = FindHashMethod(EXTERNAL_CODECS_LOC_VARS methodName, hashID);
int codecIndex = -1;
bool isFilter = false;
if (!isHashMethod)
{
UInt32 numStreams;
codecIndex = FindMethod_Index(EXTERNAL_CODECS_LOC_VARS original_MethodName,
true, // encode
hashID, numStreams, isFilter);
// we can allow non filter for BW tests
if (!isFilter) codecIndex = -1;
}
CBenchCallbackToPrint callback;
callback.Init();
callback._file = printCallback;
if (isHashMethod || codecIndex != -1)
{
if (!printCallback)
return S_FALSE;
IBenchPrintCallback &f = *printCallback;
UInt64 dict64 = dict;
if (!dictIsDefined)
dict64 = (1 << 27);
if (use_fileData)
{
if (!dictIsDefined)
dict64 = fileDataBuffer.Size();
else if (dict64 > fileDataBuffer.Size())
dict64 = fileDataBuffer.Size();
}
for (;;)
{
const int index = method.FindProp(NCoderPropID::kDictionarySize);
if (index < 0)
break;
method.Props.Delete((unsigned)index);
}
// methodName.RemoveChar(L'-');
Int32 complexity = 16 * k_Hash_Complex_Mult; // for unknown hash method
const UInt32 *checkSum = NULL;
int benchIndex = -1;
if (isHashMethod)
{
for (unsigned i = 0; i < Z7_ARRAY_SIZE(g_Hash); i++)
{
const CBenchHash &h = g_Hash[i];
AString benchMethod (h.Name);
AString benchProps;
const int propPos = benchMethod.Find(':');
if (propPos >= 0)
{
benchProps = benchMethod.Ptr((unsigned)(propPos + 1));
benchMethod.DeleteFrom((unsigned)propPos);
}
if (AreSameMethodNames(benchMethod, methodName))
{
const bool sameProps = method.PropsString.IsEqualTo_Ascii_NoCase(benchProps);
/*
bool isMainMethod = method.PropsString.IsEmpty();
if (isMainMethod)
isMainMethod = !checkSum
|| (benchMethod.IsEqualTo_Ascii_NoCase("crc32") && benchProps.IsEqualTo_Ascii_NoCase("8"));
if (sameProps || isMainMethod)
*/
{
complexity = (Int32)h.Complex;
checkSum = &h.CheckSum;
if (sameProps)
break;
/*
if property. is not specified, we use the complexity
for latest fastest method (crc32:64)
*/
}
}
}
// if (!checkSum) return E_NOTIMPL;
}
else
{
for (unsigned i = 0; i < Z7_ARRAY_SIZE(g_Bench); i++)
{
const CBenchMethod &bench = g_Bench[i];
AString benchMethod (bench.Name);
AString benchProps;
const int propPos = benchMethod.Find(':');
if (propPos >= 0)
{
benchProps = benchMethod.Ptr((unsigned)(propPos + 1));
benchMethod.DeleteFrom((unsigned)propPos);
}
if (AreSameMethodNames(benchMethod, methodName))
{
const bool sameProps = method.PropsString.IsEqualTo_Ascii_NoCase(benchProps);
// bool isMainMethod = method.PropsString.IsEmpty();
// if (sameProps || isMainMethod)
{
benchIndex = (int)i;
if (sameProps)
break;
}
}
}
// if (benchIndex < 0) return E_NOTIMPL;
}
{
/* we count usage only for crc and filter. non-filters are not supported */
UInt64 usage = (1 << 20);
UInt64 bufSize = dict64;
UInt32 numBlocks = isHashMethod ? 1 : 3;
if (use_fileData)
{
usage += fileDataBuffer.Size();
if (bufSize > fileDataBuffer.Size())
bufSize = fileDataBuffer.Size();
if (isHashMethod)
{
numBlocks = 0;
#ifndef Z7_ST
if (numThreadsSpecified != 1)
numBlocks = (k_Crc_CreateLocalBuf_For_File ? 1 : 0);
#endif
}
}
usage += numThreadsSpecified * bufSize * numBlocks;
Print_Usage_and_Threads(f, usage, numThreadsSpecified);
}
CUIntVector numThreadsVector;
{
unsigned nt = numThreads_Start;
for (;;)
{
if (nt > numThreadsSpecified)
break;
numThreadsVector.Add(nt);
const unsigned next = nt * 2;
const UInt32 ntHalf= numThreadsSpecified / 2;
if (ntHalf > nt && ntHalf < next)
numThreadsVector.Add(ntHalf);
if (numThreadsSpecified > nt && numThreadsSpecified < next)
numThreadsVector.Add(numThreadsSpecified);
nt = next;
}
}
unsigned numColumns = isHashMethod ? 1 : 2;
CTempValues speedTotals;
CTempValues usageTotals;
{
const unsigned numItems = numThreadsVector.Size() * numColumns;
speedTotals.Alloc(numItems);
usageTotals.Alloc(numItems);
for (unsigned i = 0; i < numItems; i++)
{
speedTotals.Values[i] = 0;
usageTotals.Values[i] = 0;
}
}
f.NewLine();
for (unsigned line = 0; line < 3; line++)
{
f.NewLine();
f.Print(line == 0 ? "THRD" : line == 1 ? " " : "Size");
FOR_VECTOR (ti, numThreadsVector)
{
if (ti != 0)
Print_Delimiter(f);
if (line == 0)
{
PrintSpaces(f, (kFieldSize_CrcSpeed + kFieldSize_Usage + 2) * (numColumns - 1));
PrintNumber(f, numThreadsVector[ti], 1 + kFieldSize_Usage + kFieldSize_CrcSpeed);
}
else
{
for (unsigned c = 0; c < numColumns; c++)
{
PrintRight(f, line == 1 ? "Usage" : "%", kFieldSize_Usage + 1);
PrintRight(f, line == 1 ? "BW" : "MB/s", kFieldSize_CrcSpeed + 1);
}
}
}
}
f.NewLine();
UInt64 numSteps = 0;
// for (UInt32 iter = 0; iter < numIterations; iter++)
// {
unsigned pow = 10; // kNumHashDictBits
if (startDicLog_Defined)
pow = startDicLog;
// #define NUM_SUB_BITS 2
// pow <<= NUM_SUB_BITS;
for (;; pow++)
{
const UInt64 bufSize = (UInt64)1 << pow;
// UInt64 bufSize = (UInt64)1 << (pow >> NUM_SUB_BITS);
// bufSize += ((UInt64)pow & ((1 << NUM_SUB_BITS) - 1)) << ((pow >> NUM_SUB_BITS) - NUM_SUB_BITS);
size_t dataSize = fileDataBuffer.Size();
if (dataSize > bufSize || !use_fileData)
dataSize = (size_t)bufSize;
for (UInt32 iter = 0; iter < numIterations; iter++)
{
Print_Pow(f, pow);
// PrintNumber(f, bufSize >> 10, 4);
FOR_VECTOR (ti, numThreadsVector)
{
RINOK(f.CheckBreak())
const UInt32 numThreads = numThreadsVector[ti];
if (isHashMethod)
{
UInt64 speed = 0;
UInt64 usage = 0;
const HRESULT res = CrcBench(EXTERNAL_CODECS_LOC_VARS complexInCommands,
numThreads,
dataSize, (const Byte *)fileDataBuffer,
speed, usage,
(UInt32)complexity,
1, // benchWeight,
(pow == kNumHashDictBits && !use_fileData) ? checkSum : NULL,
method,
&f,
#ifndef Z7_ST
&affinityMode,
#endif
false, // showRating
NULL, false, 0);
RINOK(res)
if (ti != 0)
Print_Delimiter(f);
Bench_BW_Print_Usage_Speed(f, usage, speed);
speedTotals.Values[ti] += speed;
usageTotals.Values[ti] += usage;
}
else
{
{
unsigned keySize = 32;
if (IsString1PrefixedByString2(methodName, "AES128")) keySize = 16;
else if (IsString1PrefixedByString2(methodName, "AES192")) keySize = 24;
callback.BenchProps.KeySize = keySize;
}
COneMethodInfo method2 = method;
unsigned bench_DictBits;
if (benchIndex >= 0)
{
const CBenchMethod &bench = g_Bench[benchIndex];
callback.BenchProps.EncComplex = bench.EncComplex;
callback.BenchProps.DecComplexUnc = bench.DecComplexUnc;
callback.BenchProps.DecComplexCompr = bench.DecComplexCompr;
bench_DictBits = bench.DictBits;
// bench_DictBits = kOldLzmaDictBits; = 32 default : for debug
}
else
{
bench_DictBits = kOldLzmaDictBits; // = 32 default
if (isFilter)
{
const unsigned k_UnknownCoderComplexity = 4;
callback.BenchProps.EncComplex = k_UnknownCoderComplexity;
callback.BenchProps.DecComplexUnc = k_UnknownCoderComplexity;
}
else
{
callback.BenchProps.EncComplex = 1 << 10;
callback.BenchProps.DecComplexUnc = 1 << 6;
}
callback.BenchProps.DecComplexCompr = 0;
}
callback.NeedPrint = false;
if (StringsAreEqualNoCase_Ascii(method2.MethodName, "LZMA"))
{
const NCOM::CPropVariant propVariant = (UInt32)pow;
RINOK(method2.ParseMethodFromPROPVARIANT((UString)"d", propVariant))
}
const HRESULT res = MethodBench(
EXTERNAL_CODECS_LOC_VARS
complexInCommands,
#ifndef Z7_ST
false, // oldLzmaBenchMode
numThreadsVector[ti],
&affinityMode,
#endif
method2,
dataSize, (const Byte *)fileDataBuffer,
bench_DictBits,
printCallback,
&callback,
&callback.BenchProps);
RINOK(res)
if (ti != 0)
Print_Delimiter(f);
for (unsigned i = 0; i < 2; i++)
{
const CBenchInfo &bi = callback.BenchInfo_Results[i];
const UInt64 usage = bi.GetUsage();
const UInt64 speed = bi.GetUnpackSizeSpeed();
usageTotals.Values[ti * 2 + i] += usage;
speedTotals.Values[ti * 2 + i] += speed;
Bench_BW_Print_Usage_Speed(f, usage, speed);
}
}
}
f.NewLine();
numSteps++;
}
if (dataSize >= dict64)
break;
}
if (numSteps != 0)
{
f.Print("Avg:");
for (unsigned ti = 0; ti < numThreadsVector.Size(); ti++)
{
if (ti != 0)
Print_Delimiter(f);
for (unsigned i = 0; i < numColumns; i++)
Bench_BW_Print_Usage_Speed(f,
usageTotals.Values[ti * numColumns + i] / numSteps,
speedTotals.Values[ti * numColumns + i] / numSteps);
}
f.NewLine();
}
return S_OK;
}
bool use2Columns = false;
bool totalBenchMode = false;
bool onlyHashBench = false;
if (methodName.IsEqualTo_Ascii_NoCase("hash"))
{
onlyHashBench = true;
methodName = "*";
totalBenchMode = true;
}
else if (methodName.Find('*') >= 0)
totalBenchMode = true;
// ---------- Threads loop ----------
for (unsigned threadsPassIndex = 0; threadsPassIndex < 3; threadsPassIndex++)
{
UInt32 numThreads = numThreadsSpecified;
if (!multiThreadTests)
{
if (threadsPassIndex != 0)
break;
}
else
{
numThreads = 1;
if (threadsPassIndex != 0)
{
if (numCPUs < 2)
break;
numThreads = numCPUs;
if (threadsPassIndex == 1)
{
if (numCPUs >= 4)
numThreads = numCPUs / 2;
}
else if (numCPUs < 4)
break;
}
}
IBenchPrintCallback &f = *printCallback;
if (threadsPassIndex > 0)
{
f.NewLine();
f.NewLine();
}
if (!dictIsDefined && !onlyHashBench)
{
const unsigned dicSizeLog_Main = (totalBenchMode ? 24 : 25);
unsigned dicSizeLog = dicSizeLog_Main;
#ifdef UNDER_CE
dicSizeLog = (UInt64)1 << 20;
#endif
if (ramSize_Defined)
for (; dicSizeLog > kBenchMinDicLogSize; dicSizeLog--)
if (GetBenchMemoryUsage(numThreads, (int)level, ((UInt64)1 << dicSizeLog), totalBenchMode) + (8 << 20) <= ramSize)
break;
dict = (UInt64)1 << dicSizeLog;
if (totalBenchMode && dicSizeLog != dicSizeLog_Main)
{
f.Print("Dictionary reduced to: ");
PrintNumber(f, dicSizeLog, 1);
f.NewLine();
}
}
Print_Usage_and_Threads(f,
onlyHashBench ?
GetBenchMemoryUsage_Hash(numThreads, dict) :
GetBenchMemoryUsage(numThreads, (int)level, dict, totalBenchMode),
numThreads);
f.NewLine();
f.NewLine();
if (totalBenchMode)
{
callback.NameFieldSize = kFieldSize_Name;
use2Columns = false;
}
else
{
callback.NameFieldSize = kFieldSize_SmallName;
use2Columns = true;
}
callback.Use2Columns = use2Columns;
bool showFreq = false;
UInt64 cpuFreq = 0;
if (totalBenchMode)
{
showFreq = true;
}
unsigned fileldSize = kFieldSize_TotalSize;
if (showFreq)
fileldSize += kFieldSize_EUAndEffec;
if (use2Columns)
{
PrintSpaces(f, callback.NameFieldSize);
PrintRight(f, "Compressing", fileldSize);
f.Print(kSep);
PrintRight(f, "Decompressing", fileldSize);
}
f.NewLine();
PrintLeft(f, totalBenchMode ? "Method" : "Dict", callback.NameFieldSize);
int j;
for (j = 0; j < 2; j++)
{
PrintRight(f, "Speed", kFieldSize_Speed + 1);
PrintRight(f, "Usage", kFieldSize_Usage + 1);
PrintRight(f, "R/U", kFieldSize_RU + 1);
PrintRight(f, "Rating", kFieldSize_Rating + 1);
if (showFreq)
{
PrintRight(f, "E/U", kFieldSize_EU + 1);
PrintRight(f, "Effec", kFieldSize_Effec + 1);
}
if (!use2Columns)
break;
if (j == 0)
f.Print(kSep);
}
f.NewLine();
PrintSpaces(f, callback.NameFieldSize);
for (j = 0; j < 2; j++)
{
PrintRight(f, "KiB/s", kFieldSize_Speed + 1);
PrintRight(f, "%", kFieldSize_Usage + 1);
PrintRight(f, "MIPS", kFieldSize_RU + 1);
PrintRight(f, "MIPS", kFieldSize_Rating + 1);
if (showFreq)
{
PrintRight(f, "%", kFieldSize_EU + 1);
PrintRight(f, "%", kFieldSize_Effec + 1);
}
if (!use2Columns)
break;
if (j == 0)
f.Print(kSep);
}
f.NewLine();
f.NewLine();
if (specifiedFreq != 0)
cpuFreq = specifiedFreq;
// bool showTotalSpeed = false;
if (totalBenchMode)
{
for (UInt32 i = 0; i < numIterations; i++)
{
if (i != 0)
printCallback->NewLine();
const unsigned kNumCpuTests = 3;
for (unsigned freqTest = 0; freqTest < kNumCpuTests; freqTest++)
{
PrintLeft(f, "CPU", kFieldSize_Name);
// UInt32 resVal;
CFreqBench fb;
fb.complexInCommands = complexInCommands;
fb.numThreads = numThreads;
// showFreq;
fb.showFreq = (freqTest == kNumCpuTests - 1 || specifiedFreq != 0);
fb.specifiedFreq = specifiedFreq;
const HRESULT res = fb.FreqBench(printCallback
#ifndef Z7_ST
, &affinityMode
#endif
);
RINOK(res)
cpuFreq = fb.CpuFreqRes;
callback.NewLine();
if (specifiedFreq != 0)
cpuFreq = specifiedFreq;
if (testTimeMs >= 1000)
if (freqTest == kNumCpuTests - 1)
{
// SetComplexCommandsMs(testTimeMs, specifiedFreq != 0, cpuFreq, complexInCommands);
}
}
callback.NewLine();
// return S_OK; // change it
callback.SetFreq(true, cpuFreq);
if (!onlyHashBench)
{
size_t dataSize = (size_t)dict;
if (use_fileData)
{
dataSize = fileDataBuffer.Size();
if (dictIsDefined && dataSize > dict)
dataSize = (size_t)dict;
}
const HRESULT res = TotalBench(EXTERNAL_CODECS_LOC_VARS
method, complexInCommands,
#ifndef Z7_ST
numThreads,
&affinityMode,
#endif
dictIsDefined || use_fileData, // forceUnpackSize
dataSize,
(const Byte *)fileDataBuffer,
printCallback, &callback);
RINOK(res)
}
{
size_t dataSize = (size_t)1 << kNumHashDictBits;
if (dictIsDefined)
{
dataSize = (size_t)dict;
if (dataSize != dict)
return E_OUTOFMEMORY;
}
if (use_fileData)
{
dataSize = fileDataBuffer.Size();
if (dictIsDefined && dataSize > dict)
dataSize = (size_t)dict;
}
const HRESULT res = TotalBench_Hash(EXTERNAL_CODECS_LOC_VARS
method, complexInCommands,
numThreads,
dataSize, (const Byte *)fileDataBuffer,
printCallback, &callback,
#ifndef Z7_ST
&affinityMode,
#endif
&callback.EncodeRes, true, cpuFreq);
RINOK(res)
}
callback.NewLine();
{
PrintLeft(f, "CPU", kFieldSize_Name);
CFreqBench fb;
fb.complexInCommands = complexInCommands;
fb.numThreads = numThreads;
// showFreq;
fb.showFreq = (specifiedFreq != 0);
fb.specifiedFreq = specifiedFreq;
const HRESULT res = fb.FreqBench(printCallback
#ifndef Z7_ST
, &affinityMode
#endif
);
RINOK(res)
callback.NewLine();
}
}
}
else
{
needSetComplexity = true;
if (!methodName.IsEqualTo_Ascii_NoCase("LZMA"))
{
unsigned i;
for (i = 0; i < Z7_ARRAY_SIZE(g_Bench); i++)
{
const CBenchMethod &h = g_Bench[i];
AString benchMethod (h.Name);
AString benchProps;
const int propPos = benchMethod.Find(':');
if (propPos >= 0)
{
benchProps = benchMethod.Ptr((unsigned)(propPos + 1));
benchMethod.DeleteFrom((unsigned)propPos);
}
if (AreSameMethodNames(benchMethod, methodName))
{
if (benchProps.IsEmpty()
|| (benchProps == "x5" && method.PropsString.IsEmpty())
|| method.PropsString.IsPrefixedBy_Ascii_NoCase(benchProps))
{
callback.BenchProps.EncComplex = h.EncComplex;
callback.BenchProps.DecComplexCompr = h.DecComplexCompr;
callback.BenchProps.DecComplexUnc = h.DecComplexUnc;
needSetComplexity = false;
break;
}
}
}
/*
if (i == Z7_ARRAY_SIZE(g_Bench))
return E_NOTIMPL;
*/
}
if (needSetComplexity)
callback.BenchProps.SetLzmaCompexity();
if (startDicLog < kBenchMinDicLogSize)
startDicLog = kBenchMinDicLogSize;
for (unsigned i = 0; i < numIterations; i++)
{
unsigned pow = (dict < GetDictSizeFromLog(startDicLog)) ? kBenchMinDicLogSize : (unsigned)startDicLog;
if (!multiDict)
pow = 32;
while (GetDictSizeFromLog(pow) > dict && pow > 0)
pow--;
for (; GetDictSizeFromLog(pow) <= dict; pow++)
{
Print_Pow(f, pow);
callback.DictSize = (UInt64)1 << pow;
COneMethodInfo method2 = method;
if (StringsAreEqualNoCase_Ascii(method2.MethodName, "LZMA"))
{
// We add dictionary size property.
// method2 can have two different dictionary size properties.
// And last property is main.
NCOM::CPropVariant propVariant = (UInt32)pow;
RINOK(method2.ParseMethodFromPROPVARIANT((UString)"d", propVariant))
}
size_t uncompressedDataSize;
if (use_fileData)
{
uncompressedDataSize = fileDataBuffer.Size();
}
else
{
uncompressedDataSize = (size_t)callback.DictSize;
if (uncompressedDataSize != callback.DictSize)
return E_OUTOFMEMORY;
if (uncompressedDataSize >= (1 << 18))
uncompressedDataSize += kAdditionalSize;
}
const HRESULT res = MethodBench(
EXTERNAL_CODECS_LOC_VARS
complexInCommands,
#ifndef Z7_ST
true, numThreads,
&affinityMode,
#endif
method2,
uncompressedDataSize, (const Byte *)fileDataBuffer,
kOldLzmaDictBits, printCallback, &callback, &callback.BenchProps);
f.NewLine();
RINOK(res)
if (!multiDict)
break;
}
}
}
PrintChars(f, '-', callback.NameFieldSize + fileldSize);
if (use2Columns)
{
f.Print(kSep);
PrintChars(f, '-', fileldSize);
}
f.NewLine();
if (use2Columns)
{
PrintLeft(f, "Avr:", callback.NameFieldSize);
PrintTotals(f, showFreq, cpuFreq, !totalBenchMode, callback.EncodeRes);
f.Print(kSep);
PrintTotals(f, showFreq, cpuFreq, !totalBenchMode, callback.DecodeRes);
f.NewLine();
}
PrintLeft(f, "Tot:", callback.NameFieldSize);
CTotalBenchRes midRes;
midRes = callback.EncodeRes;
midRes.Update_With_Res(callback.DecodeRes);
// midRes.SetSum(callback.EncodeRes, callback.DecodeRes);
PrintTotals(f, showFreq, cpuFreq, false, midRes);
f.NewLine();
}
return S_OK;
}