From 6d1594e8918466be267c2d885bbf4d3c9c2846ff Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Fri, 3 Dec 2021 12:10:15 +0000 Subject: [PATCH] runtime: Use rep stos for memclr -- no vectors or nontemporal stores. --- src/runtime/memclr_amd64.s | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/src/runtime/memclr_amd64.s b/src/runtime/memclr_amd64.s index 700bbd7b9b..8bffa7b192 100644 --- a/src/runtime/memclr_amd64.s +++ b/src/runtime/memclr_amd64.s @@ -17,6 +17,39 @@ TEXT runtime·memclrNoHeapPointers(SB), NOSPLIT, $0-16 MOVQ AX, DI // DI = ptr XORQ AX, AX +#ifndef hack + CLD // DF = 0 so STOSB moves forward + MOVQ BX, CX + TESTQ $0x1, CX + JNZ byte + TESTQ $0x3, CX + JNZ word + TESTQ $0x7, CX + JNZ long + // DI = ptr + // CX = n + // AX = 0 + // for (; CX != 0; CX--) *DI++ = AX; +quad: + SHRQ $3, CX + REP + STOSQ + RET +long: + SHRQ $2, CX + REP + STOSL + RET +word: + SHRQ $1, CX + REP + STOSW + RET +byte: + REP + STOSB + RET +#else // MOVOU seems always faster than REP STOSQ. tail: // BSR+branch table make almost all memmove/memclr benchmarks worse. Not worth doing. @@ -177,3 +210,4 @@ _129through256: MOVOU X15, -32(DI)(BX*1) MOVOU X15, -16(DI)(BX*1) RET +#endif