diff --git a/COFF/Chunks.cpp b/COFF/Chunks.cpp index 2e49f417a20..791d96ee92a 100644 --- a/COFF/Chunks.cpp +++ b/COFF/Chunks.cpp @@ -326,41 +326,38 @@ void SEHTableChunk::writeTo(uint8_t *Buf) const { // usually loaded to that address. However, if there's already another // DLL that overlaps, the loader has to relocate it. To do that, DLLs // contain .reloc sections which contain offsets that need to be fixed -// up at runtime. If the loader find that a DLL cannot be loaded to its +// up at runtime. If the loader finds that a DLL cannot be loaded to its // desired base address, it loads it to somewhere else, and add - to each offset that is -// specified by .reloc section. +// specified by the .reloc section. In ELF terms, .reloc sections +// contain relative relocations in REL format (as opposed to RELA.) // -// In ELF terms, .reloc sections contain arrays of relocation offsets. -// All these offsets in the section are implicitly R_*_RELATIVE, and -// addends are read from section contents (so it is REL as opposed to -// RELA). +// This already significantly reduces the size of relocations compared +// to ELF .rel.dyn, but Windows does more to reduce it (probably because +// it was invented for PCs in the late '80s or early '90s.) Offsets in +// .reloc are grouped by page where the page size is 12 bits, and +// offsets sharing the same page address are stored consecutively to +// represent them with less space. This is very similar to the page +// table which is grouped by (multiple stages of) pages. // -// This already reduce the size of relocations to 1/3 compared to ELF -// .dynrel, but Windows does more to reduce it (probably because it was -// invented for PCs in the late '80s or early '90s.) Offsets in .reloc -// are grouped by page where page size is 16 bits, and offsets sharing -// the same page address are stored consecutively to represent them with -// less space. This is a very similar to the page table which is grouped -// by (multiple stages of) pages. -// -// For example, let's say we have 0x00030, 0x00500, 0x01000, 0x01100, -// 0x20004, and 0x20008 in a .reloc section. In the section, they are -// represented like this: +// For example, let's say we have 0x00030, 0x00500, 0x00700, 0x00A00, +// 0x20004, and 0x20008 in a .reloc section for x64. The uppermost 4 +// bits have a type IMAGE_REL_BASED_DIR64 or 0xA. In the section, they +// are represented like this: // // 0x00000 -- page address (4 bytes) // 16 -- size of this block (4 bytes) -// 0x0030 -- entries (2 bytes each) -// 0x0500 -// 0x1000 -// 0x1100 +// 0xA030 -- entries (2 bytes each) +// 0xA500 +// 0xA700 +// 0xAA00 // 0x20000 -- page address (4 bytes) // 12 -- size of this block (4 bytes) -// 0x0004 -- entries (2 bytes each) -// 0x0008 +// 0xA004 -- entries (2 bytes each) +// 0xA008 // -// Usually we have a lot of relocatinos for each page, so the number of -// bytes for one .reloc entry is close to 2 bytes. +// Usually we have a lot of relocations for each page, so the number of +// bytes for one .reloc entry is close to 2 bytes on average. BaserelChunk::BaserelChunk(uint32_t Page, Baserel *Begin, Baserel *End) { // Block header consists of 4 byte page RVA and 4 byte block size. // Each entry is 2 byte. Last entry may be padding. diff --git a/COFF/Error.cpp b/COFF/Error.cpp index b2c7c89bd36..166b1971e77 100644 --- a/COFF/Error.cpp +++ b/COFF/Error.cpp @@ -59,6 +59,7 @@ void log(const Twine &Msg) { if (Config->Verbose) { std::lock_guard Lock(Mu); outs() << Argv0 << ": " << Msg << "\n"; + outs().flush(); } } diff --git a/COFF/ICF.cpp b/COFF/ICF.cpp index 19468c0fac5..fe59de6efa5 100644 --- a/COFF/ICF.cpp +++ b/COFF/ICF.cpp @@ -71,10 +71,18 @@ uint32_t ICF::getHash(SectionChunk *C) { } // Returns true if section S is subject of ICF. +// +// Microsoft's documentation +// (https://msdn.microsoft.com/en-us/library/bxwfs976.aspx; visited April +// 2017) says that /opt:icf folds both functions and read-only data. +// Despite that, the MSVC linker folds only functions. We found +// a few instances of programs that are not safe for data merging. +// Therefore, we merge only functions just like the MSVC tool. bool ICF::isEligible(SectionChunk *C) { bool Global = C->Sym && C->Sym->isExternal(); + bool Executable = C->getPermissions() & llvm::COFF::IMAGE_SCN_MEM_EXECUTE; bool Writable = C->getPermissions() & llvm::COFF::IMAGE_SCN_MEM_WRITE; - return C->isCOMDAT() && C->isLive() && Global && !Writable; + return C->isCOMDAT() && C->isLive() && Global && Executable && !Writable; } // Split a range into smaller ranges by recoloring sections diff --git a/COFF/InputFiles.cpp b/COFF/InputFiles.cpp index cb56e13014d..df3b6a032cf 100644 --- a/COFF/InputFiles.cpp +++ b/COFF/InputFiles.cpp @@ -327,6 +327,9 @@ void ImportFile::parse() { ImpSym = cast( Symtab->addImportData(ImpName, this)->body()); + if (Hdr->getType() == llvm::COFF::IMPORT_CONST) + ConstSym = + cast(Symtab->addImportData(Name, this)->body()); // If type is function, we need to create a thunk which jump to an // address pointed by the __imp_ symbol. (This allows you to call diff --git a/COFF/InputFiles.h b/COFF/InputFiles.h index 3078de68752..a2fd3f59ad7 100644 --- a/COFF/InputFiles.h +++ b/COFF/InputFiles.h @@ -167,6 +167,7 @@ public: static bool classof(const InputFile *F) { return F->kind() == ImportKind; } DefinedImportData *ImpSym = nullptr; + DefinedImportData *ConstSym = nullptr; DefinedImportThunk *ThunkSym = nullptr; std::string DLLName; diff --git a/COFF/MapFile.cpp b/COFF/MapFile.cpp index 43dd8dc3581..4e596e602fe 100644 --- a/COFF/MapFile.cpp +++ b/COFF/MapFile.cpp @@ -11,21 +11,21 @@ // hierarchically the output sections, input sections, input files and // symbol: // -// Address Size Align Out In File Symbol -// ================================================================= -// 00201000 00000015 4 .text -// 00201000 0000000e 4 .text -// 00201000 0000000e 4 test.o -// 0020100e 00000000 0 local -// 00201005 00000000 0 f(int) +// Address Size Align Out File Symbol +// 00201000 00000015 4 .text +// 00201000 0000000e 4 test.o:(.text) +// 0020100e 00000000 0 local +// 00201005 00000000 0 f(int) // //===----------------------------------------------------------------------===// #include "MapFile.h" #include "Error.h" +#include "SymbolTable.h" #include "Symbols.h" #include "Writer.h" +#include "lld/Core/Parallel.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -34,72 +34,58 @@ using namespace llvm::object; using namespace lld; using namespace lld::coff; -static void writeOutSecLine(raw_fd_ostream &OS, uint64_t Address, uint64_t Size, - uint64_t Align, StringRef Name) { - OS << format("%08llx %08llx %5lld ", Address, Size, Align) - << left_justify(Name, 7); +typedef DenseMap> + SymbolMapTy; + +// Print out the first three columns of a line. +static void writeHeader(raw_ostream &OS, uint64_t Addr, uint64_t Size, + uint64_t Align) { + OS << format("%08llx %08llx %5lld ", Addr, Size, Align); } -static void writeInSecLine(raw_fd_ostream &OS, uint64_t Address, uint64_t Size, - uint64_t Align, StringRef Name) { - // Pass an empty name to align the text to the correct column. - writeOutSecLine(OS, Address, Size, Align, ""); - OS << ' ' << left_justify(Name, 7); +static std::string indent(int Depth) { return std::string(Depth * 8, ' '); } + +// Returns a list of all symbols that we want to print out. +static std::vector getSymbols() { + std::vector V; + for (coff::ObjectFile *File : Symtab->ObjectFiles) + for (SymbolBody *B : File->getSymbols()) + if (auto *Sym = dyn_cast(B)) + if (Sym && !Sym->getCOFFSymbol().isSectionDefinition()) + V.push_back(Sym); + return V; } -static void writeFileLine(raw_fd_ostream &OS, uint64_t Address, uint64_t Size, - uint64_t Align, StringRef Name) { - // Pass an empty name to align the text to the correct column. - writeInSecLine(OS, Address, Size, Align, ""); - OS << ' ' << left_justify(Name, 7); -} +// Returns a map from sections to their symbols. +static SymbolMapTy getSectionSyms(ArrayRef Syms) { + SymbolMapTy Ret; + for (DefinedRegular *S : Syms) + Ret[S->getChunk()].push_back(S); -static void writeSymbolLine(raw_fd_ostream &OS, uint64_t Address, uint64_t Size, - StringRef Name) { - // Pass an empty name to align the text to the correct column. - writeFileLine(OS, Address, Size, 0, ""); - OS << ' ' << left_justify(Name, 7); -} - -static void writeSectionChunk(raw_fd_ostream &OS, const SectionChunk *SC, - StringRef &PrevName) { - StringRef Name = SC->getSectionName(); - if (Name != PrevName) { - writeInSecLine(OS, SC->getRVA(), SC->getSize(), SC->getAlign(), Name); - OS << '\n'; - PrevName = Name; - } - coff::ObjectFile *File = SC->File; - if (!File) - return; - writeFileLine(OS, SC->getRVA(), SC->getSize(), SC->getAlign(), - toString(File)); - OS << '\n'; - ArrayRef Syms = File->getSymbols(); - for (SymbolBody *Sym : Syms) { - auto *DR = dyn_cast(Sym); - if (!DR || DR->getChunk() != SC || - DR->getCOFFSymbol().isSectionDefinition()) - continue; - writeSymbolLine(OS, DR->getRVA(), SC->getSize(), toString(*Sym)); - OS << '\n'; + // Sort symbols by address. + for (auto &It : Ret) { + SmallVectorImpl &V = It.second; + std::sort(V.begin(), V.end(), [](DefinedRegular *A, DefinedRegular *B) { + return A->getRVA() < B->getRVA(); + }); } + return Ret; } -static void writeMapFile2(raw_fd_ostream &OS, - ArrayRef OutputSections) { - OS << "Address Size Align Out In File Symbol\n"; +// Construct a map from symbols to their stringified representations. +static DenseMap +getSymbolStrings(ArrayRef Syms) { + std::vector Str(Syms.size()); + parallel_for((size_t)0, Syms.size(), [&](size_t I) { + raw_string_ostream OS(Str[I]); + writeHeader(OS, Syms[I]->getRVA(), 0, 0); + OS << indent(2) << toString(*Syms[I]); + }); - for (OutputSection *Sec : OutputSections) { - uint32_t VA = Sec->getRVA(); - writeOutSecLine(OS, VA, Sec->getVirtualSize(), /*Align=*/PageSize, - Sec->getName()); - OS << '\n'; - StringRef PrevName = ""; - for (Chunk *C : Sec->getChunks()) - if (const auto *SC = dyn_cast(C)) - writeSectionChunk(OS, SC, PrevName); - } + DenseMap Ret; + for (size_t I = 0, E = Syms.size(); I < E; ++I) + Ret[Syms[I]] = std::move(Str[I]); + return Ret; } void coff::writeMapFile(ArrayRef OutputSections) { @@ -110,5 +96,30 @@ void coff::writeMapFile(ArrayRef OutputSections) { raw_fd_ostream OS(Config->MapFile, EC, sys::fs::F_None); if (EC) fatal("cannot open " + Config->MapFile + ": " + EC.message()); - writeMapFile2(OS, OutputSections); + + // Collect symbol info that we want to print out. + std::vector Syms = getSymbols(); + SymbolMapTy SectionSyms = getSectionSyms(Syms); + DenseMap SymStr = getSymbolStrings(Syms); + + // Print out the header line. + OS << "Address Size Align Out In Symbol\n"; + + // Print out file contents. + for (OutputSection *Sec : OutputSections) { + writeHeader(OS, Sec->getRVA(), Sec->getVirtualSize(), /*Align=*/PageSize); + OS << Sec->getName() << '\n'; + + for (Chunk *C : Sec->getChunks()) { + auto *SC = dyn_cast(C); + if (!SC) + continue; + + writeHeader(OS, SC->getRVA(), SC->getSize(), SC->getAlign()); + OS << indent(1) << SC->File->getName() << ":(" << SC->getSectionName() + << ")\n"; + for (DefinedRegular *Sym : SectionSyms[SC]) + OS << SymStr[Sym] << '\n'; + } + } } diff --git a/ELF/Config.h b/ELF/Config.h index d25c63c3c0d..1ace4aa26fd 100644 --- a/ELF/Config.h +++ b/ELF/Config.h @@ -89,7 +89,7 @@ struct Configuration { llvm::StringRef SoName; llvm::StringRef Sysroot; llvm::StringRef ThinLTOCacheDir; - std::string RPath; + std::string Rpath; std::vector VersionDefinitions; std::vector AuxiliaryList; std::vector SearchPaths; diff --git a/ELF/Driver.cpp b/ELF/Driver.cpp index 68eb5616a5c..6a71eb3ee49 100644 --- a/ELF/Driver.cpp +++ b/ELF/Driver.cpp @@ -124,7 +124,7 @@ static std::tuple parseEmulation(StringRef Emul) { // Returns slices of MB by parsing MB as an archive file. // Each slice consists of a member file in the archive. std::vector -LinkerDriver::getArchiveMembers(MemoryBufferRef MB) { +static getArchiveMembers(MemoryBufferRef MB) { std::unique_ptr File = check(Archive::create(MB), MB.getBufferIdentifier() + ": failed to parse archive"); @@ -242,6 +242,9 @@ static void checkOptions(opt::InputArgList &Args) { if (Config->Pie && Config->Shared) error("-shared and -pie may not be used together"); + if (!Config->Shared && !Config->AuxiliaryList.empty()) + error("-f may not be used without -shared"); + if (Config->Relocatable) { if (Config->Shared) error("-r and -shared may not be used together"); @@ -396,7 +399,7 @@ static std::vector getArgs(opt::InputArgList &Args, int Id) { return V; } -static std::string getRPath(opt::InputArgList &Args) { +static std::string getRpath(opt::InputArgList &Args) { std::vector V = getArgs(Args, OPT_rpath); return llvm::join(V.begin(), V.end(), ":"); } @@ -444,16 +447,14 @@ static UnresolvedPolicy getUnresolvedSymbolPolicy(opt::InputArgList &Args) { } static Target2Policy getTarget2(opt::InputArgList &Args) { - if (auto *Arg = Args.getLastArg(OPT_target2)) { - StringRef S = Arg->getValue(); - if (S == "rel") - return Target2Policy::Rel; - if (S == "abs") - return Target2Policy::Abs; - if (S == "got-rel") - return Target2Policy::GotRel; - error("unknown --target2 option: " + S); - } + StringRef S = getString(Args, OPT_target2, "got-rel"); + if (S == "rel") + return Target2Policy::Rel; + if (S == "abs") + return Target2Policy::Abs; + if (S == "got-rel") + return Target2Policy::GotRel; + error("unknown --target2 option: " + S); return Target2Policy::GotRel; } @@ -550,6 +551,29 @@ static std::pair getHashStyle(opt::InputArgList &Args) { return {true, true}; } +// Parse --build-id or --build-id=