PPC.cpp 15.3 KB
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473
//===- PPC.cpp ------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "OutputSections.h"
#include "Symbols.h"
#include "SyntheticSections.h"
#include "Target.h"
#include "Thunks.h"
#include "lld/Common/ErrorHandler.h"
#include "llvm/Support/Endian.h"

using namespace llvm;
using namespace llvm::support::endian;
using namespace llvm::ELF;
using namespace lld;
using namespace lld::elf;

namespace {
class PPC final : public TargetInfo {
public:
  PPC();
  RelExpr getRelExpr(RelType type, const Symbol &s,
                     const uint8_t *loc) const override;
  RelType getDynRel(RelType type) const override;
  void writeGotHeader(uint8_t *buf) const override;
  void writePltHeader(uint8_t *buf) const override {
    llvm_unreachable("should call writePPC32GlinkSection() instead");
  }
  void writePlt(uint8_t *buf, const Symbol &sym,
                uint64_t pltEntryAddr) const override {
    llvm_unreachable("should call writePPC32GlinkSection() instead");
  }
  void writeIplt(uint8_t *buf, const Symbol &sym,
                 uint64_t pltEntryAddr) const override;
  void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
  bool needsThunk(RelExpr expr, RelType relocType, const InputFile *file,
                  uint64_t branchAddr, const Symbol &s,
                  int64_t a) const override;
  uint32_t getThunkSectionSpacing() const override;
  bool inBranchRange(RelType type, uint64_t src, uint64_t dst) const override;
  void relocate(uint8_t *loc, const Relocation &rel,
                uint64_t val) const override;
  RelExpr adjustRelaxExpr(RelType type, const uint8_t *data,
                          RelExpr expr) const override;
  int getTlsGdRelaxSkip(RelType type) const override;
  void relaxTlsGdToIe(uint8_t *loc, const Relocation &rel,
                      uint64_t val) const override;
  void relaxTlsGdToLe(uint8_t *loc, const Relocation &rel,
                      uint64_t val) const override;
  void relaxTlsLdToLe(uint8_t *loc, const Relocation &rel,
                      uint64_t val) const override;
  void relaxTlsIeToLe(uint8_t *loc, const Relocation &rel,
                      uint64_t val) const override;
};
} // namespace

static uint16_t lo(uint32_t v) { return v; }
static uint16_t ha(uint32_t v) { return (v + 0x8000) >> 16; }

static uint32_t readFromHalf16(const uint8_t *loc) {
  return read32(config->isLE ? loc : loc - 2);
}

static void writeFromHalf16(uint8_t *loc, uint32_t insn) {
  write32(config->isLE ? loc : loc - 2, insn);
}

void elf::writePPC32GlinkSection(uint8_t *buf, size_t numEntries) {
  // Create canonical PLT entries for non-PIE code. Compilers don't generate
  // non-GOT-non-PLT relocations referencing external functions for -fpie/-fPIE.
  uint32_t glink = in.plt->getVA(); // VA of .glink
  if (!config->isPic) {
    for (const Symbol *sym : cast<PPC32GlinkSection>(in.plt)->canonical_plts) {
      writePPC32PltCallStub(buf, sym->getGotPltVA(), nullptr, 0);
      buf += 16;
      glink += 16;
    }
  }

  // On PPC Secure PLT ABI, bl foo@plt jumps to a call stub, which loads an
  // absolute address from a specific .plt slot (usually called .got.plt on
  // other targets) and jumps there.
  //
  // a) With immediate binding (BIND_NOW), the .plt entry is resolved at load
  // time. The .glink section is not used.
  // b) With lazy binding, the .plt entry points to a `b PLTresolve`
  // instruction in .glink, filled in by PPC::writeGotPlt().

  // Write N `b PLTresolve` first.
  for (size_t i = 0; i != numEntries; ++i)
    write32(buf + 4 * i, 0x48000000 | 4 * (numEntries - i));
  buf += 4 * numEntries;

  // Then write PLTresolve(), which has two forms: PIC and non-PIC. PLTresolve()
  // computes the PLT index (by computing the distance from the landing b to
  // itself) and calls _dl_runtime_resolve() (in glibc).
  uint32_t got = in.got->getVA();
  const uint8_t *end = buf + 64;
  if (config->isPic) {
    uint32_t afterBcl = 4 * in.plt->getNumEntries() + 12;
    uint32_t gotBcl = got + 4 - (glink + afterBcl);
    write32(buf + 0, 0x3d6b0000 | ha(afterBcl));  // addis r11,r11,1f-glink@ha
    write32(buf + 4, 0x7c0802a6);                 // mflr r0
    write32(buf + 8, 0x429f0005);                 // bcl 20,30,.+4
    write32(buf + 12, 0x396b0000 | lo(afterBcl)); // 1: addi r11,r11,1b-glink@l
    write32(buf + 16, 0x7d8802a6);                // mflr r12
    write32(buf + 20, 0x7c0803a6);                // mtlr r0
    write32(buf + 24, 0x7d6c5850);                // sub r11,r11,r12
    write32(buf + 28, 0x3d8c0000 | ha(gotBcl));   // addis 12,12,GOT+4-1b@ha
    if (ha(gotBcl) == ha(gotBcl + 4)) {
      write32(buf + 32, 0x800c0000 | lo(gotBcl)); // lwz r0,r12,GOT+4-1b@l(r12)
      write32(buf + 36,
              0x818c0000 | lo(gotBcl + 4));       // lwz r12,r12,GOT+8-1b@l(r12)
    } else {
      write32(buf + 32, 0x840c0000 | lo(gotBcl)); // lwzu r0,r12,GOT+4-1b@l(r12)
      write32(buf + 36, 0x818c0000 | 4);          // lwz r12,r12,4(r12)
    }
    write32(buf + 40, 0x7c0903a6);                // mtctr 0
    write32(buf + 44, 0x7c0b5a14);                // add r0,11,11
    write32(buf + 48, 0x7d605a14);                // add r11,0,11
    write32(buf + 52, 0x4e800420);                // bctr
    buf += 56;
  } else {
    write32(buf + 0, 0x3d800000 | ha(got + 4));   // lis     r12,GOT+4@ha
    write32(buf + 4, 0x3d6b0000 | ha(-glink));    // addis   r11,r11,-glink@ha
    if (ha(got + 4) == ha(got + 8))
      write32(buf + 8, 0x800c0000 | lo(got + 4)); // lwz r0,GOT+4@l(r12)
    else
      write32(buf + 8, 0x840c0000 | lo(got + 4)); // lwzu r0,GOT+4@l(r12)
    write32(buf + 12, 0x396b0000 | lo(-glink));   // addi    r11,r11,-glink@l
    write32(buf + 16, 0x7c0903a6);                // mtctr   r0
    write32(buf + 20, 0x7c0b5a14);                // add     r0,r11,r11
    if (ha(got + 4) == ha(got + 8))
      write32(buf + 24, 0x818c0000 | lo(got + 8)); // lwz r12,GOT+8@l(r12)
    else
      write32(buf + 24, 0x818c0000 | 4);          // lwz r12,4(r12)
    write32(buf + 28, 0x7d605a14);                // add     r11,r0,r11
    write32(buf + 32, 0x4e800420);                // bctr
    buf += 36;
  }

  // Pad with nop. They should not be executed.
  for (; buf < end; buf += 4)
    write32(buf, 0x60000000);
}

PPC::PPC() {
  copyRel = R_PPC_COPY;
  gotRel = R_PPC_GLOB_DAT;
  noneRel = R_PPC_NONE;
  pltRel = R_PPC_JMP_SLOT;
  relativeRel = R_PPC_RELATIVE;
  iRelativeRel = R_PPC_IRELATIVE;
  symbolicRel = R_PPC_ADDR32;
  gotBaseSymInGotPlt = false;
  gotHeaderEntriesNum = 3;
  gotPltHeaderEntriesNum = 0;
  pltHeaderSize = 0;
  pltEntrySize = 4;
  ipltEntrySize = 16;

  needsThunks = true;

  tlsModuleIndexRel = R_PPC_DTPMOD32;
  tlsOffsetRel = R_PPC_DTPREL32;
  tlsGotRel = R_PPC_TPREL32;

  defaultMaxPageSize = 65536;
  defaultImageBase = 0x10000000;

  write32(trapInstr.data(), 0x7fe00008);
}

void PPC::writeIplt(uint8_t *buf, const Symbol &sym,
                    uint64_t /*pltEntryAddr*/) const {
  // In -pie or -shared mode, assume r30 points to .got2+0x8000, and use a
  // .got2.plt_pic32. thunk.
  writePPC32PltCallStub(buf, sym.getGotPltVA(), sym.file, 0x8000);
}

void PPC::writeGotHeader(uint8_t *buf) const {
  // _GLOBAL_OFFSET_TABLE_[0] = _DYNAMIC
  // glibc stores _dl_runtime_resolve in _GLOBAL_OFFSET_TABLE_[1],
  // link_map in _GLOBAL_OFFSET_TABLE_[2].
  write32(buf, mainPart->dynamic->getVA());
}

void PPC::writeGotPlt(uint8_t *buf, const Symbol &s) const {
  // Address of the symbol resolver stub in .glink .
  write32(buf, in.plt->getVA() + in.plt->headerSize + 4 * s.pltIndex);
}

bool PPC::needsThunk(RelExpr expr, RelType type, const InputFile *file,
                     uint64_t branchAddr, const Symbol &s, int64_t a) const {
  if (type != R_PPC_LOCAL24PC && type != R_PPC_REL24 && type != R_PPC_PLTREL24)
    return false;
  if (s.isInPlt())
    return true;
  if (s.isUndefWeak())
    return false;
  return !PPC::inBranchRange(type, branchAddr, s.getVA(a));
}

uint32_t PPC::getThunkSectionSpacing() const { return 0x2000000; }

bool PPC::inBranchRange(RelType type, uint64_t src, uint64_t dst) const {
  uint64_t offset = dst - src;
  if (type == R_PPC_LOCAL24PC || type == R_PPC_REL24 || type == R_PPC_PLTREL24)
    return isInt<26>(offset);
  llvm_unreachable("unsupported relocation type used in branch");
}

RelExpr PPC::getRelExpr(RelType type, const Symbol &s,
                        const uint8_t *loc) const {
  switch (type) {
  case R_PPC_NONE:
    return R_NONE;
  case R_PPC_ADDR16_HA:
  case R_PPC_ADDR16_HI:
  case R_PPC_ADDR16_LO:
  case R_PPC_ADDR32:
    return R_ABS;
  case R_PPC_DTPREL16:
  case R_PPC_DTPREL16_HA:
  case R_PPC_DTPREL16_HI:
  case R_PPC_DTPREL16_LO:
  case R_PPC_DTPREL32:
    return R_DTPREL;
  case R_PPC_REL14:
  case R_PPC_REL32:
  case R_PPC_REL16_LO:
  case R_PPC_REL16_HI:
  case R_PPC_REL16_HA:
    return R_PC;
  case R_PPC_GOT16:
    return R_GOT_OFF;
  case R_PPC_LOCAL24PC:
  case R_PPC_REL24:
    return R_PLT_PC;
  case R_PPC_PLTREL24:
    return R_PPC32_PLTREL;
  case R_PPC_GOT_TLSGD16:
    return R_TLSGD_GOT;
  case R_PPC_GOT_TLSLD16:
    return R_TLSLD_GOT;
  case R_PPC_GOT_TPREL16:
    return R_GOT_OFF;
  case R_PPC_TLS:
    return R_TLSIE_HINT;
  case R_PPC_TLSGD:
    return R_TLSDESC_CALL;
  case R_PPC_TLSLD:
    return R_TLSLD_HINT;
  case R_PPC_TPREL16:
  case R_PPC_TPREL16_HA:
  case R_PPC_TPREL16_LO:
  case R_PPC_TPREL16_HI:
    return R_TLS;
  default:
    error(getErrorLocation(loc) + "unknown relocation (" + Twine(type) +
          ") against symbol " + toString(s));
    return R_NONE;
  }
}

RelType PPC::getDynRel(RelType type) const {
  if (type == R_PPC_ADDR32)
    return type;
  return R_PPC_NONE;
}

static std::pair<RelType, uint64_t> fromDTPREL(RelType type, uint64_t val) {
  uint64_t dtpBiasedVal = val - 0x8000;
  switch (type) {
  case R_PPC_DTPREL16:
    return {R_PPC64_ADDR16, dtpBiasedVal};
  case R_PPC_DTPREL16_HA:
    return {R_PPC_ADDR16_HA, dtpBiasedVal};
  case R_PPC_DTPREL16_HI:
    return {R_PPC_ADDR16_HI, dtpBiasedVal};
  case R_PPC_DTPREL16_LO:
    return {R_PPC_ADDR16_LO, dtpBiasedVal};
  case R_PPC_DTPREL32:
    return {R_PPC_ADDR32, dtpBiasedVal};
  default:
    return {type, val};
  }
}

void PPC::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
  RelType newType;
  std::tie(newType, val) = fromDTPREL(rel.type, val);
  switch (newType) {
  case R_PPC_ADDR16:
    checkIntUInt(loc, val, 16, rel);
    write16(loc, val);
    break;
  case R_PPC_GOT16:
  case R_PPC_GOT_TLSGD16:
  case R_PPC_GOT_TLSLD16:
  case R_PPC_GOT_TPREL16:
  case R_PPC_TPREL16:
    checkInt(loc, val, 16, rel);
    write16(loc, val);
    break;
  case R_PPC_ADDR16_HA:
  case R_PPC_DTPREL16_HA:
  case R_PPC_GOT_TLSGD16_HA:
  case R_PPC_GOT_TLSLD16_HA:
  case R_PPC_GOT_TPREL16_HA:
  case R_PPC_REL16_HA:
  case R_PPC_TPREL16_HA:
    write16(loc, ha(val));
    break;
  case R_PPC_ADDR16_HI:
  case R_PPC_DTPREL16_HI:
  case R_PPC_GOT_TLSGD16_HI:
  case R_PPC_GOT_TLSLD16_HI:
  case R_PPC_GOT_TPREL16_HI:
  case R_PPC_REL16_HI:
  case R_PPC_TPREL16_HI:
    write16(loc, val >> 16);
    break;
  case R_PPC_ADDR16_LO:
  case R_PPC_DTPREL16_LO:
  case R_PPC_GOT_TLSGD16_LO:
  case R_PPC_GOT_TLSLD16_LO:
  case R_PPC_GOT_TPREL16_LO:
  case R_PPC_REL16_LO:
  case R_PPC_TPREL16_LO:
    write16(loc, val);
    break;
  case R_PPC_ADDR32:
  case R_PPC_REL32:
    write32(loc, val);
    break;
  case R_PPC_REL14: {
    uint32_t mask = 0x0000FFFC;
    checkInt(loc, val, 16, rel);
    checkAlignment(loc, val, 4, rel);
    write32(loc, (read32(loc) & ~mask) | (val & mask));
    break;
  }
  case R_PPC_REL24:
  case R_PPC_LOCAL24PC:
  case R_PPC_PLTREL24: {
    uint32_t mask = 0x03FFFFFC;
    checkInt(loc, val, 26, rel);
    checkAlignment(loc, val, 4, rel);
    write32(loc, (read32(loc) & ~mask) | (val & mask));
    break;
  }
  default:
    llvm_unreachable("unknown relocation");
  }
}

RelExpr PPC::adjustRelaxExpr(RelType type, const uint8_t *data,
                             RelExpr expr) const {
  if (expr == R_RELAX_TLS_GD_TO_IE)
    return R_RELAX_TLS_GD_TO_IE_GOT_OFF;
  if (expr == R_RELAX_TLS_LD_TO_LE)
    return R_RELAX_TLS_LD_TO_LE_ABS;
  return expr;
}

int PPC::getTlsGdRelaxSkip(RelType type) const {
  // A __tls_get_addr call instruction is marked with 2 relocations:
  //
  //   R_PPC_TLSGD / R_PPC_TLSLD: marker relocation
  //   R_PPC_REL24: __tls_get_addr
  //
  // After the relaxation we no longer call __tls_get_addr and should skip both
  // relocations to not create a false dependence on __tls_get_addr being
  // defined.
  if (type == R_PPC_TLSGD || type == R_PPC_TLSLD)
    return 2;
  return 1;
}

void PPC::relaxTlsGdToIe(uint8_t *loc, const Relocation &rel,
                         uint64_t val) const {
  switch (rel.type) {
  case R_PPC_GOT_TLSGD16: {
    // addi rT, rA, x@got@tlsgd --> lwz rT, x@got@tprel(rA)
    uint32_t insn = readFromHalf16(loc);
    writeFromHalf16(loc, 0x80000000 | (insn & 0x03ff0000));
    relocateNoSym(loc, R_PPC_GOT_TPREL16, val);
    break;
  }
  case R_PPC_TLSGD:
    // bl __tls_get_addr(x@tldgd) --> add r3, r3, r2
    write32(loc, 0x7c631214);
    break;
  default:
    llvm_unreachable("unsupported relocation for TLS GD to IE relaxation");
  }
}

void PPC::relaxTlsGdToLe(uint8_t *loc, const Relocation &rel,
                         uint64_t val) const {
  switch (rel.type) {
  case R_PPC_GOT_TLSGD16:
    // addi r3, r31, x@got@tlsgd --> addis r3, r2, x@tprel@ha
    writeFromHalf16(loc, 0x3c620000 | ha(val));
    break;
  case R_PPC_TLSGD:
    // bl __tls_get_addr(x@tldgd) --> add r3, r3, x@tprel@l
    write32(loc, 0x38630000 | lo(val));
    break;
  default:
    llvm_unreachable("unsupported relocation for TLS GD to LE relaxation");
  }
}

void PPC::relaxTlsLdToLe(uint8_t *loc, const Relocation &rel,
                         uint64_t val) const {
  switch (rel.type) {
  case R_PPC_GOT_TLSLD16:
    // addi r3, rA, x@got@tlsgd --> addis r3, r2, 0
    writeFromHalf16(loc, 0x3c620000);
    break;
  case R_PPC_TLSLD:
    // r3+x@dtprel computes r3+x-0x8000, while we want it to compute r3+x@tprel
    // = r3+x-0x7000, so add 4096 to r3.
    // bl __tls_get_addr(x@tlsld) --> addi r3, r3, 4096
    write32(loc, 0x38631000);
    break;
  case R_PPC_DTPREL16:
  case R_PPC_DTPREL16_HA:
  case R_PPC_DTPREL16_HI:
  case R_PPC_DTPREL16_LO:
    relocate(loc, rel, val);
    break;
  default:
    llvm_unreachable("unsupported relocation for TLS LD to LE relaxation");
  }
}

void PPC::relaxTlsIeToLe(uint8_t *loc, const Relocation &rel,
                         uint64_t val) const {
  switch (rel.type) {
  case R_PPC_GOT_TPREL16: {
    // lwz rT, x@got@tprel(rA) --> addis rT, r2, x@tprel@ha
    uint32_t rt = readFromHalf16(loc) & 0x03e00000;
    writeFromHalf16(loc, 0x3c020000 | rt | ha(val));
    break;
  }
  case R_PPC_TLS: {
    uint32_t insn = read32(loc);
    if (insn >> 26 != 31)
      error("unrecognized instruction for IE to LE R_PPC_TLS");
    // addi rT, rT, x@tls --> addi rT, rT, x@tprel@l
    uint32_t dFormOp = getPPCDFormOp((read32(loc) & 0x000007fe) >> 1);
    if (dFormOp == 0)
      error("unrecognized instruction for IE to LE R_PPC_TLS");
    write32(loc, (dFormOp << 26) | (insn & 0x03ff0000) | lo(val));
    break;
  }
  default:
    llvm_unreachable("unsupported relocation for TLS IE to LE relaxation");
  }
}

TargetInfo *elf::getPPCTargetInfo() {
  static PPC target;
  return &target;
}