diff --git a/snappy-internal.h b/snappy-internal.h index 4b53d59..81c613d 100644 --- a/snappy-internal.h +++ b/snappy-internal.h @@ -84,7 +84,7 @@ char* CompressFragment(const char* input, // // Separate implementation for 64-bit, little-endian cpus. #if !defined(SNAPPY_IS_BIG_ENDIAN) && \ - (defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM)) + (defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM)|| defined(ARCH_MIPS)) static inline std::pair FindMatchLength(const char* s1, const char* s2, const char* s2_limit) { diff --git a/snappy-stubs-internal.h b/snappy-stubs-internal.h index f834bdb..cfc048b 100644 --- a/snappy-stubs-internal.h +++ b/snappy-stubs-internal.h @@ -80,6 +80,10 @@ #define ARCH_ARM 1 +#elif defined(__mips__) + +#define ARCH_MIPS 1 + #endif // Needed by OS X, among others. @@ -212,7 +216,77 @@ inline uint64 UNALIGNED_LOAD64(const void *p) { inline void UNALIGNED_STORE64(void *p, uint64 v) { memcpy(p, &v, sizeof v); } +#elif defined(__mips64) +inline uint16 UNALIGNED_LOAD16(const void *p) { + uint16 t; + __asm__ volatile ( + ".set noat \n\t" + "lb %[t], 0x0(%[p]) \n\t" + "lb $1, 0x1(%[p]) \n\t" + "ins %[t], $1, 8, 8 \n\t" + :[t]"=&r"(t) + :[p]"r"(p) + : + ); + return t; +} + +inline void UNALIGNED_STORE16(void *p, uint16 v) { + __asm__ volatile ( + ".set noat \n\t" + "sb %[v], 0x0(%[p]) \n\t" + "srl $1, %[v], 8\n\t" + "sb $1, 0x1(%[p]) \n\t" + : + :[p]"r"(p),[v]"r"(v) + : + ); +} + +inline uint32 UNALIGNED_LOAD32(const void *p) { + uint32 t; + __asm__ volatile ( + "lwl %[t], 0x3(%[p]) \n\t" + "lwr %[t], 0x0(%[p]) \n\t" + :[t]"=&r"(t) + :[p]"r"(p) + : + ); + return t; +} + +inline uint64 UNALIGNED_LOAD64(const void *p) { + uint64 t; + __asm__ volatile ( + "ldl %[temp], 0x7(%[p]) \n\t" + "ldr %[temp], 0x0(%[p]) \n\t" + :[temp]"=&r"(t) + :[p]"r"(p) + : + ); + return t; +} + +inline void UNALIGNED_STORE32(void *p, uint32 v) { + __asm__ volatile ( + "swl %[v], 0x3(%[p]) \n\t" + "swr %[v], 0x0(%[p]) \n\t" + : + :[p]"r"(p),[v]"r"(v) + : + ); +} + +inline void UNALIGNED_STORE64(void *p, uint64 v) { + __asm__ volatile ( + "sdl %[v], 0x7(%[p]) \n\t" + "sdr %[v], 0x0(%[p]) \n\t" + : + :[p]"r"(p),[v]"r"(v) + : + ); +} #else // These functions are provided for architectures that don't support @@ -343,7 +417,6 @@ class LittleEndian { static bool IsLittleEndian() { return true; } #endif // !defined(SNAPPY_IS_BIG_ENDIAN) - // Functions to do unaligned loads and stores in little-endian order. static uint16 Load16(const void *p) { return ToHost16(UNALIGNED_LOAD16(p)); @@ -373,9 +446,9 @@ class Bits { // that it's 0-indexed. static int FindLSBSetNonZero(uint32 n); -#if defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM) +#if defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM) || defined(ARCH_MIPS) static int FindLSBSetNonZero64(uint64 n); -#endif // defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM) +#endif // defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM) || defined(ARCH_MIPS) private: // No copying @@ -393,11 +466,11 @@ inline int Bits::FindLSBSetNonZero(uint32 n) { return __builtin_ctz(n); } -#if defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM) +#if defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM) || defined(ARCH_MIPS) inline int Bits::FindLSBSetNonZero64(uint64 n) { return __builtin_ctzll(n); } -#endif // defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM) +#endif // defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM) || defined(ARCH_MIPS) #elif defined(_MSC_VER) @@ -422,7 +495,7 @@ inline int Bits::FindLSBSetNonZero64(uint64 n) { if (_BitScanForward64(&where, n)) return static_cast(where); return 64; } -#endif // defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM) +#endif // defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM) || defined(ARCH_MIPS) #else // Portable versions. diff --git a/snappy.cc b/snappy.cc index e594bb9..00b8e5b 100644 --- a/snappy.cc +++ b/snappy.cc @@ -102,9 +102,22 @@ size_t MaxCompressedLength(size_t source_len) { namespace { void UnalignedCopy64(const void* src, void* dst) { +#if defined(__mips64) + __asm__ volatile ( + ".set noat \n\t" + "ldl $1, 0x7(%[src]) \n\t" + "ldr $1, 0x0(%[src]) \n\t" + "sdl $1, 0x7(%[dst]) \n\t" + "sdr $1, 0x0(%[dst]) \n\t" + : + :[src]"r"(src),[dst]"r"(dst) + : + ); +#else char tmp[8]; memcpy(tmp, src, 8); memcpy(dst, tmp, 8); +#endif } void UnalignedCopy128(const void* src, void* dst) { @@ -459,7 +472,7 @@ uint16* WorkingMemory::GetHashTable(size_t input_size, int* table_size) { // is done when GetEightBytesAt() is called, whereas for 32-bit, the load is // done at GetUint32AtOffset() time. -#ifdef ARCH_K8 +#if defined(ARCH_K8) || defined(ARCH_ARM) || defined(ARCH_MIPS) typedef uint64 EightBytesReference;