diff --git a/snappy-internal.h b/snappy-internal.h index f88577c..e7adfbf 100644 --- a/snappy-internal.h +++ b/snappy-internal.h @@ -97,6 +97,16 @@ char* CompressFragment(const char* input, // Separate implementation for 64-bit, little-endian cpus. #if !defined(SNAPPY_IS_BIG_ENDIAN) && \ (defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM)) + +#if ARCH_ARM +static inline void Prefetch(const void* data) { + __asm__ __volatile__( + "prfm PLDL1STRM, [%[data]] \n\t" + :: [data] "r" (data) + ); +} +#endif //ARCH_ARM + static inline std::pair FindMatchLength(const char* s1, const char* s2, const char* s2_limit, @@ -104,6 +114,11 @@ static inline std::pair FindMatchLength(const char* s1, assert(s2_limit >= s2); size_t matched = 0; + #if ARCH_ARM + Prefetch(s1 + 256); + Prefetch(s2 + 256); + #endif //ARCH_ARM + // This block isn't necessary for correctness; we could just start looping // immediately. As an optimization though, it is useful. It creates some not // uncommon code paths that determine, without extra effort, whether the match