Make UNALIGNED_LOAD16/32 on ARMv7 go through an explicitly unaligned struct,

to avoid the compiler coalescing multiple loads into a single load instruction
(which only work for aligned accesses).

A typical example where GCC would coalesce:

  uint8* p = ...;
  uint32 a = UNALIGNED_LOAD32(p);
  uint32 b = UNALIGNED_LOAD32(p + 4);
  uint32 c = a | b;
This commit is contained in:
Steinar H. Gunderson 2016-01-04 12:51:31 +01:00
parent 96a2e340f3
commit ef5598aa0e
1 changed files with 35 additions and 4 deletions

View File

@ -116,6 +116,15 @@ static const int64 kint64max = static_cast<int64>(0x7FFFFFFFFFFFFFFFLL);
// sub-architectures. // sub-architectures.
// //
// This is a mess, but there's not much we can do about it. // This is a mess, but there's not much we can do about it.
//
// To further complicate matters, only LDR instructions (single reads) are
// allowed to be unaligned, not LDRD (two reads) or LDM (many reads). Unless we
// explicitly tell the compiler that these accesses can be unaligned, it can and
// will combine accesses. On armcc, the way to signal this is done by accessing
// through the type (uint32 __packed *), but GCC has no such attribute
// (it ignores __attribute__((packed)) on individual variables). However,
// we can tell it that a _struct_ is unaligned, which has the same effect,
// so we do that.
#elif defined(__arm__) && \ #elif defined(__arm__) && \
!defined(__ARM_ARCH_4__) && \ !defined(__ARM_ARCH_4__) && \
@ -131,11 +140,33 @@ static const int64 kint64max = static_cast<int64>(0x7FFFFFFFFFFFFFFFLL);
!defined(__ARM_ARCH_6ZK__) && \ !defined(__ARM_ARCH_6ZK__) && \
!defined(__ARM_ARCH_6T2__) !defined(__ARM_ARCH_6T2__)
#define UNALIGNED_LOAD16(_p) (*reinterpret_cast<const uint16 *>(_p)) namespace base {
#define UNALIGNED_LOAD32(_p) (*reinterpret_cast<const uint32 *>(_p)) namespace internal {
#define UNALIGNED_STORE16(_p, _val) (*reinterpret_cast<uint16 *>(_p) = (_val)) struct Unaligned16Struct {
#define UNALIGNED_STORE32(_p, _val) (*reinterpret_cast<uint32 *>(_p) = (_val)) uint16 value;
uint8 dummy; // To make the size non-power-of-two.
} ATTRIBUTE_PACKED;
struct Unaligned32Struct {
uint32 value;
uint8 dummy; // To make the size non-power-of-two.
} ATTRIBUTE_PACKED;
} // namespace internal
} // namespace base
#define UNALIGNED_LOAD16(_p) \
((reinterpret_cast<const ::base::internal::Unaligned16Struct *>(_p))->value)
#define UNALIGNED_LOAD32(_p) \
((reinterpret_cast<const ::base::internal::Unaligned32Struct *>(_p))->value)
#define UNALIGNED_STORE16(_p, _val) \
((reinterpret_cast<::base::internal::Unaligned16Struct *>(_p))->value = \
(_val))
#define UNALIGNED_STORE32(_p, _val) \
((reinterpret_cast<::base::internal::Unaligned32Struct *>(_p))->value = \
(_val))
// TODO(user): NEON supports unaligned 64-bit loads and stores. // TODO(user): NEON supports unaligned 64-bit loads and stores.
// See if that would be more efficient on platforms supporting it, // See if that would be more efficient on platforms supporting it,