Skip to content

Commit

Permalink
arm64: lib: memory utilities optimization
Browse files Browse the repository at this point in the history
Optimize memcpy and memmove, to prefetch several cache lines.
We can achieve 15% memcpy speed improvement with the preload method.
  • Loading branch information
Hong-Mei Li authored and Ristovski committed Nov 18, 2023
1 parent 7ea39c5 commit 631860a
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 0 deletions.
2 changes: 2 additions & 0 deletions arch/arm64/lib/copy_template.S
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ C_h .req x12
D_l .req x13
D_h .req x14

prfm pldl1strm, [src, #(1*L1_CACHE_BYTES)]
mov dst, dstin
cmp count, #16
/*When memory length is less than 16, the accessed are not aligned.*/
Expand Down Expand Up @@ -169,6 +170,7 @@ D_h .req x14
ldp1 C_l, C_h, src, #16
stp1 D_l, D_h, dst, #16
ldp1 D_l, D_h, src, #16
prfm pldl1strm, [src, #(4*L1_CACHE_BYTES)]
subs count, count, #64
b.ge 1b
stp1 A_l, A_h, dst, #16
Expand Down
2 changes: 2 additions & 0 deletions arch/arm64/lib/memmove.S
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ D_h .req x14

SYM_FUNC_START_ALIAS(__memmove)
SYM_FUNC_START_WEAK_PI(memmove)
prfm pldl1strm, [src, #L1_CACHE_BYTES]
cmp dstin, src
b.lo __memcpy
add tmp1, src, count
Expand Down Expand Up @@ -173,6 +174,7 @@ SYM_FUNC_START_WEAK_PI(memmove)
ldp C_l, C_h, [src, #-48]
stp D_l, D_h, [dst, #-64]!
ldp D_l, D_h, [src, #-64]!
prfm pldl1strm, [src, #(4*L1_CACHE_BYTES)]
subs count, count, #64
b.ge 1b
stp A_l, A_h, [dst, #-16]
Expand Down

0 comments on commit 631860a

Please sign in to comment.