-
Notifications
You must be signed in to change notification settings - Fork 0
/
fast_copy_memcpy.cpp
33 lines (27 loc) · 1.07 KB
/
fast_copy_memcpy.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
void memcpy1(void *pvDest, void *pvSrc, size_t nBytes) {
typedef long long __int64;
/* We don't care about this scenario at this moment - WIP */
if(nBytes<sizeof(__int64_t))
{
/* this code can be faster - WIP */
char* b = reinterpret_cast<char*>(pvSrc);
char* e = b + nBytes;
char* out = reinterpret_cast<char*>(pvDest);
std::copy(b,e,out);
return;
}
/* Below is real gain */
for(size_t i=0; i + sizeof(__int64) <= nBytes;i+=sizeof(__int64)) {
_mm_stream_si64 ((reinterpret_cast<__int64*>(reinterpret_cast<char*>(pvDest)+i)), *(reinterpret_cast<__int64*>((reinterpret_cast<char*>(pvSrc) + i))));
}
/* We don't care about this scenario at this moment - WIP */
size_t left_bytes = nBytes % sizeof(__int64);
if(left_bytes)
{
/* this code can be faster - WIP */
char* e = reinterpret_cast<char*>(pvSrc) + nBytes;
char* b = e - left_bytes;
char* out = reinterpret_cast<char*>(pvDest) + nBytes - left_bytes;
std::copy(b,e,out);
}
}