Skip to content

Commit 776476c

Browse files
authoredNov 23, 2024
Reapply "[MemProf] Use radix tree for alloc contexts in bitcode summaries" (llvm#117395) (llvm#117404)
This reverts commit fdb050a, and restores ccb4702, with a fix for build bot failures. Specifically, add ProfileData to the dependences of the BitWriter library, which was causing shared library builds of LLVM to fail. Reproduced the failure with a shared library build and confirmed this change fixes that build failure.
1 parent aa5dc53 commit 776476c

File tree

8 files changed

+244
-30
lines changed

8 files changed

+244
-30
lines changed
 

‎llvm/include/llvm/Bitcode/LLVMBitCodes.h

+7-3
Original file line numberDiff line numberDiff line change
@@ -307,12 +307,12 @@ enum GlobalValueSummarySymtabCodes {
307307
// [valueid, n x stackidindex]
308308
FS_PERMODULE_CALLSITE_INFO = 26,
309309
// Summary of per-module allocation memprof metadata.
310-
// [nummib, nummib x (alloc type, numstackids, numstackids x stackidindex),
310+
// [nummib, nummib x (alloc type, context radix tree index),
311311
// [nummib x (numcontext x total size)]?]
312312
FS_PERMODULE_ALLOC_INFO = 27,
313313
// Summary of combined index memprof callsite metadata.
314-
// [valueid, numstackindices, numver,
315-
// numstackindices x stackidindex, numver x version]
314+
// [valueid, context radix tree index, numver,
315+
// numver x version]
316316
FS_COMBINED_CALLSITE_INFO = 28,
317317
// Summary of combined index allocation memprof metadata.
318318
// [nummib, numver,
@@ -331,6 +331,10 @@ enum GlobalValueSummarySymtabCodes {
331331
// the entries must be in the exact same order as the corresponding sizes.
332332
// [nummib x (numcontext x full stack id)]
333333
FS_ALLOC_CONTEXT_IDS = 31,
334+
// Linearized radix tree of allocation contexts. See the description above the
335+
// CallStackRadixTreeBuilder class in ProfileData/MemProf.h for format.
336+
// [n x entry]
337+
FS_CONTEXT_RADIX_TREE_ARRAY = 32,
334338
};
335339

336340
enum MetadataCodes {

‎llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -329,6 +329,7 @@ GetCodeName(unsigned CodeID, unsigned BlockID,
329329
STRINGIFY_CODE(FS, COMBINED_ALLOC_INFO)
330330
STRINGIFY_CODE(FS, STACK_IDS)
331331
STRINGIFY_CODE(FS, ALLOC_CONTEXT_IDS)
332+
STRINGIFY_CODE(FS, CONTEXT_RADIX_TREE_ARRAY)
332333
}
333334
case bitc::METADATA_ATTACHMENT_ID:
334335
switch (CodeID) {

‎llvm/lib/Bitcode/Reader/BitcodeReader.cpp

+55-16
Original file line numberDiff line numberDiff line change
@@ -987,6 +987,10 @@ class ModuleSummaryIndexBitcodeReader : public BitcodeReaderBase {
987987
/// ids from the lists in the callsite and alloc entries to the index.
988988
std::vector<uint64_t> StackIds;
989989

990+
/// Linearized radix tree of allocation contexts. See the description above
991+
/// the CallStackRadixTreeBuilder class in ProfileData/MemProf.h for format.
992+
std::vector<uint64_t> RadixArray;
993+
990994
public:
991995
ModuleSummaryIndexBitcodeReader(
992996
BitstreamCursor Stream, StringRef Strtab, ModuleSummaryIndex &TheIndex,
@@ -1013,6 +1017,8 @@ class ModuleSummaryIndexBitcodeReader : public BitcodeReaderBase {
10131017
TypeIdCompatibleVtableInfo &TypeId);
10141018
std::vector<FunctionSummary::ParamAccess>
10151019
parseParamAccesses(ArrayRef<uint64_t> Record);
1020+
SmallVector<unsigned> parseAllocInfoContext(ArrayRef<uint64_t> Record,
1021+
unsigned &I);
10161022

10171023
template <bool AllowNullValueInfo = false>
10181024
std::pair<ValueInfo, GlobalValue::GUID>
@@ -7544,6 +7550,48 @@ void ModuleSummaryIndexBitcodeReader::parseTypeIdCompatibleVtableSummaryRecord(
75447550
parseTypeIdCompatibleVtableInfo(Record, Slot, TypeId);
75457551
}
75467552

7553+
SmallVector<unsigned> ModuleSummaryIndexBitcodeReader::parseAllocInfoContext(
7554+
ArrayRef<uint64_t> Record, unsigned &I) {
7555+
SmallVector<unsigned> StackIdList;
7556+
// For backwards compatibility with old format before radix tree was
7557+
// used, simply see if we found a radix tree array record (and thus if
7558+
// the RadixArray is non-empty).
7559+
if (RadixArray.empty()) {
7560+
unsigned NumStackEntries = Record[I++];
7561+
assert(Record.size() - I >= NumStackEntries);
7562+
StackIdList.reserve(NumStackEntries);
7563+
for (unsigned J = 0; J < NumStackEntries; J++) {
7564+
assert(Record[I] < StackIds.size());
7565+
StackIdList.push_back(
7566+
TheIndex.addOrGetStackIdIndex(StackIds[Record[I++]]));
7567+
}
7568+
} else {
7569+
unsigned RadixIndex = Record[I++];
7570+
// See the comments above CallStackRadixTreeBuilder in ProfileData/MemProf.h
7571+
// for a detailed description of the radix tree array format. Briefly, the
7572+
// first entry will be the number of frames, any negative values are the
7573+
// negative of the offset of the next frame, and otherwise the frames are in
7574+
// increasing linear order.
7575+
assert(RadixIndex < RadixArray.size());
7576+
unsigned NumStackIds = RadixArray[RadixIndex++];
7577+
StackIdList.reserve(NumStackIds);
7578+
while (NumStackIds--) {
7579+
assert(RadixIndex < RadixArray.size());
7580+
unsigned Elem = RadixArray[RadixIndex];
7581+
if (static_cast<std::make_signed_t<unsigned>>(Elem) < 0) {
7582+
RadixIndex = RadixIndex - Elem;
7583+
assert(RadixIndex < RadixArray.size());
7584+
Elem = RadixArray[RadixIndex];
7585+
// We shouldn't encounter a second offset in a row.
7586+
assert(static_cast<std::make_signed_t<unsigned>>(Elem) >= 0);
7587+
}
7588+
RadixIndex++;
7589+
StackIdList.push_back(TheIndex.addOrGetStackIdIndex(StackIds[Elem]));
7590+
}
7591+
}
7592+
return StackIdList;
7593+
}
7594+
75477595
static void setSpecialRefs(SmallVectorImpl<ValueInfo> &Refs, unsigned ROCnt,
75487596
unsigned WOCnt) {
75497597
// Readonly and writeonly refs are in the end of the refs list.
@@ -8010,6 +8058,11 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
80108058
break;
80118059
}
80128060

8061+
case bitc::FS_CONTEXT_RADIX_TREE_ARRAY: { // [n x entry]
8062+
RadixArray = ArrayRef<uint64_t>(Record);
8063+
break;
8064+
}
8065+
80138066
case bitc::FS_PERMODULE_CALLSITE_INFO: {
80148067
unsigned ValueID = Record[0];
80158068
SmallVector<unsigned> StackIdList;
@@ -8065,14 +8118,7 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
80658118
(Version < 10 && I < Record.size())) {
80668119
assert(Record.size() - I >= 2);
80678120
AllocationType AllocType = (AllocationType)Record[I++];
8068-
unsigned NumStackEntries = Record[I++];
8069-
assert(Record.size() - I >= NumStackEntries);
8070-
SmallVector<unsigned> StackIdList;
8071-
for (unsigned J = 0; J < NumStackEntries; J++) {
8072-
assert(Record[I] < StackIds.size());
8073-
StackIdList.push_back(
8074-
TheIndex.addOrGetStackIdIndex(StackIds[Record[I++]]));
8075-
}
8121+
auto StackIdList = parseAllocInfoContext(Record, I);
80768122
MIBs.push_back(MIBInfo(AllocType, std::move(StackIdList)));
80778123
}
80788124
// We either have nothing left or at least NumMIBs context size info
@@ -8123,14 +8169,7 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
81238169
while (MIBsRead++ < NumMIBs) {
81248170
assert(Record.size() - I >= 2);
81258171
AllocationType AllocType = (AllocationType)Record[I++];
8126-
unsigned NumStackEntries = Record[I++];
8127-
assert(Record.size() - I >= NumStackEntries);
8128-
SmallVector<unsigned> StackIdList;
8129-
for (unsigned J = 0; J < NumStackEntries; J++) {
8130-
assert(Record[I] < StackIds.size());
8131-
StackIdList.push_back(
8132-
TheIndex.addOrGetStackIdIndex(StackIds[Record[I++]]));
8133-
}
8172+
auto StackIdList = parseAllocInfoContext(Record, I);
81348173
MIBs.push_back(MIBInfo(AllocType, std::move(StackIdList)));
81358174
}
81368175
assert(Record.size() - I >= NumVersions);

‎llvm/lib/Bitcode/Writer/BitcodeWriter.cpp

+147-11
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@
6060
#include "llvm/MC/StringTableBuilder.h"
6161
#include "llvm/MC/TargetRegistry.h"
6262
#include "llvm/Object/IRSymtab.h"
63+
#include "llvm/ProfileData/MemProf.h"
6364
#include "llvm/Support/AtomicOrdering.h"
6465
#include "llvm/Support/Casting.h"
6566
#include "llvm/Support/CommandLine.h"
@@ -83,6 +84,7 @@
8384
#include <vector>
8485

8586
using namespace llvm;
87+
using namespace llvm::memprof;
8688

8789
static cl::opt<unsigned>
8890
IndexThreshold("bitcode-mdindex-threshold", cl::Hidden, cl::init(25),
@@ -231,7 +233,8 @@ class ModuleBitcodeWriterBase : public BitcodeWriterBase {
231233
SmallVector<uint64_t, 64> &NameVals, GlobalValueSummary *Summary,
232234
unsigned ValueID, unsigned FSCallsAbbrev, unsigned FSCallsProfileAbbrev,
233235
unsigned CallsiteAbbrev, unsigned AllocAbbrev, unsigned ContextIdAbbvId,
234-
const Function &F);
236+
const Function &F, DenseMap<CallStackId, LinearCallStackId> &CallStackPos,
237+
CallStackId &CallStackCount);
235238
void writeModuleLevelReferences(const GlobalVariable &V,
236239
SmallVector<uint64_t, 64> &NameVals,
237240
unsigned FSModRefsAbbrev,
@@ -4195,12 +4198,58 @@ static void writeTypeIdCompatibleVtableSummaryRecord(
41954198
}
41964199
}
41974200

4201+
// Adds the allocation contexts to the CallStacks map. We simply use the
4202+
// size at the time the context was added as the CallStackId. This works because
4203+
// when we look up the call stacks later on we process the function summaries
4204+
// and their allocation records in the same exact order.
4205+
static void collectMemProfCallStacks(
4206+
FunctionSummary *FS, std::function<LinearFrameId(unsigned)> GetStackIndex,
4207+
MapVector<CallStackId, llvm::SmallVector<LinearFrameId>> &CallStacks) {
4208+
// The interfaces in ProfileData/MemProf.h use a type alias for a stack frame
4209+
// id offset into the index of the full stack frames. The ModuleSummaryIndex
4210+
// currently uses unsigned. Make sure these stay in sync.
4211+
static_assert(std::is_same_v<LinearFrameId, unsigned>);
4212+
for (auto &AI : FS->allocs()) {
4213+
for (auto &MIB : AI.MIBs) {
4214+
SmallVector<unsigned> StackIdIndices;
4215+
StackIdIndices.reserve(MIB.StackIdIndices.size());
4216+
for (auto Id : MIB.StackIdIndices)
4217+
StackIdIndices.push_back(GetStackIndex(Id));
4218+
// The CallStackId is the size at the time this context was inserted.
4219+
CallStacks.insert({CallStacks.size(), StackIdIndices});
4220+
}
4221+
}
4222+
}
4223+
4224+
// Build the radix tree from the accumulated CallStacks, write out the resulting
4225+
// linearized radix tree array, and return the map of call stack positions into
4226+
// this array for use when writing the allocation records. The returned map is
4227+
// indexed by a CallStackId which in this case is implicitly determined by the
4228+
// order of function summaries and their allocation infos being written.
4229+
static DenseMap<CallStackId, LinearCallStackId> writeMemoryProfileRadixTree(
4230+
MapVector<CallStackId, llvm::SmallVector<LinearFrameId>> &&CallStacks,
4231+
BitstreamWriter &Stream, unsigned RadixAbbrev) {
4232+
assert(!CallStacks.empty());
4233+
DenseMap<unsigned, FrameStat> FrameHistogram =
4234+
computeFrameHistogram<LinearFrameId>(CallStacks);
4235+
CallStackRadixTreeBuilder<LinearFrameId> Builder;
4236+
// We don't need a MemProfFrameIndexes map as we have already converted the
4237+
// full stack id hash to a linear offset into the StackIds array.
4238+
Builder.build(std::move(CallStacks), /*MemProfFrameIndexes=*/std::nullopt,
4239+
FrameHistogram);
4240+
Stream.EmitRecord(bitc::FS_CONTEXT_RADIX_TREE_ARRAY, Builder.getRadixArray(),
4241+
RadixAbbrev);
4242+
return Builder.takeCallStackPos();
4243+
}
4244+
41984245
static void writeFunctionHeapProfileRecords(
41994246
BitstreamWriter &Stream, FunctionSummary *FS, unsigned CallsiteAbbrev,
42004247
unsigned AllocAbbrev, unsigned ContextIdAbbvId, bool PerModule,
42014248
std::function<unsigned(const ValueInfo &VI)> GetValueID,
42024249
std::function<unsigned(unsigned)> GetStackIndex,
4203-
bool WriteContextSizeInfoIndex) {
4250+
bool WriteContextSizeInfoIndex,
4251+
DenseMap<CallStackId, LinearCallStackId> &CallStackPos,
4252+
CallStackId &CallStackCount) {
42044253
SmallVector<uint64_t> Record;
42054254

42064255
for (auto &CI : FS->callsites()) {
@@ -4234,9 +4283,9 @@ static void writeFunctionHeapProfileRecords(
42344283
Record.push_back(AI.Versions.size());
42354284
for (auto &MIB : AI.MIBs) {
42364285
Record.push_back((uint8_t)MIB.AllocType);
4237-
Record.push_back(MIB.StackIdIndices.size());
4238-
for (auto Id : MIB.StackIdIndices)
4239-
Record.push_back(GetStackIndex(Id));
4286+
// Record the index into the radix tree array for this context.
4287+
assert(CallStackCount <= CallStackPos.size());
4288+
Record.push_back(CallStackPos[CallStackCount++]);
42404289
}
42414290
if (!PerModule) {
42424291
for (auto V : AI.Versions)
@@ -4282,7 +4331,9 @@ void ModuleBitcodeWriterBase::writePerModuleFunctionSummaryRecord(
42824331
SmallVector<uint64_t, 64> &NameVals, GlobalValueSummary *Summary,
42834332
unsigned ValueID, unsigned FSCallsRelBFAbbrev,
42844333
unsigned FSCallsProfileAbbrev, unsigned CallsiteAbbrev,
4285-
unsigned AllocAbbrev, unsigned ContextIdAbbvId, const Function &F) {
4334+
unsigned AllocAbbrev, unsigned ContextIdAbbvId, const Function &F,
4335+
DenseMap<CallStackId, LinearCallStackId> &CallStackPos,
4336+
CallStackId &CallStackCount) {
42864337
NameVals.push_back(ValueID);
42874338

42884339
FunctionSummary *FS = cast<FunctionSummary>(Summary);
@@ -4297,7 +4348,7 @@ void ModuleBitcodeWriterBase::writePerModuleFunctionSummaryRecord(
42974348
/*PerModule*/ true,
42984349
/*GetValueId*/ [&](const ValueInfo &VI) { return getValueId(VI); },
42994350
/*GetStackIndex*/ [&](unsigned I) { return I; },
4300-
/*WriteContextSizeInfoIndex*/ true);
4351+
/*WriteContextSizeInfoIndex*/ true, CallStackPos, CallStackCount);
43014352

43024353
auto SpecialRefCnts = FS->specialRefCounts();
43034354
NameVals.push_back(getEncodedGVSummaryFlags(FS->flags()));
@@ -4530,12 +4581,54 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() {
45304581
Abbv = std::make_shared<BitCodeAbbrev>();
45314582
Abbv->Add(BitCodeAbbrevOp(bitc::FS_PERMODULE_ALLOC_INFO));
45324583
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // nummib
4533-
// n x (alloc type, numstackids, numstackids x stackidindex)
4584+
// n x (alloc type, context radix tree index)
45344585
// optional: nummib x (numcontext x total size)
45354586
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
45364587
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
45374588
unsigned AllocAbbrev = Stream.EmitAbbrev(std::move(Abbv));
45384589

4590+
Abbv = std::make_shared<BitCodeAbbrev>();
4591+
Abbv->Add(BitCodeAbbrevOp(bitc::FS_CONTEXT_RADIX_TREE_ARRAY));
4592+
// n x entry
4593+
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
4594+
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
4595+
unsigned RadixAbbrev = Stream.EmitAbbrev(std::move(Abbv));
4596+
4597+
// First walk through all the functions and collect the allocation contexts in
4598+
// their associated summaries, for use in constructing a radix tree of
4599+
// contexts. Note that we need to do this in the same order as the functions
4600+
// are processed further below since the call stack positions in the resulting
4601+
// radix tree array are identified based on this order.
4602+
MapVector<CallStackId, llvm::SmallVector<LinearFrameId>> CallStacks;
4603+
for (const Function &F : M) {
4604+
// Summary emission does not support anonymous functions, they have to be
4605+
// renamed using the anonymous function renaming pass.
4606+
if (!F.hasName())
4607+
report_fatal_error("Unexpected anonymous function when writing summary");
4608+
4609+
ValueInfo VI = Index->getValueInfo(F.getGUID());
4610+
if (!VI || VI.getSummaryList().empty()) {
4611+
// Only declarations should not have a summary (a declaration might
4612+
// however have a summary if the def was in module level asm).
4613+
assert(F.isDeclaration());
4614+
continue;
4615+
}
4616+
auto *Summary = VI.getSummaryList()[0].get();
4617+
FunctionSummary *FS = cast<FunctionSummary>(Summary);
4618+
collectMemProfCallStacks(
4619+
FS, /*GetStackIndex*/ [](unsigned I) { return I; }, CallStacks);
4620+
}
4621+
// Finalize the radix tree, write it out, and get the map of positions in the
4622+
// linearized tree array.
4623+
DenseMap<CallStackId, LinearCallStackId> CallStackPos;
4624+
if (!CallStacks.empty()) {
4625+
CallStackPos =
4626+
writeMemoryProfileRadixTree(std::move(CallStacks), Stream, RadixAbbrev);
4627+
}
4628+
4629+
// Keep track of the current index into the CallStackPos map.
4630+
CallStackId CallStackCount = 0;
4631+
45394632
SmallVector<uint64_t, 64> NameVals;
45404633
// Iterate over the list of functions instead of the Index to
45414634
// ensure the ordering is stable.
@@ -4555,7 +4648,8 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() {
45554648
auto *Summary = VI.getSummaryList()[0].get();
45564649
writePerModuleFunctionSummaryRecord(
45574650
NameVals, Summary, VE.getValueID(&F), FSCallsRelBFAbbrev,
4558-
FSCallsProfileAbbrev, CallsiteAbbrev, AllocAbbrev, ContextIdAbbvId, F);
4651+
FSCallsProfileAbbrev, CallsiteAbbrev, AllocAbbrev, ContextIdAbbvId, F,
4652+
CallStackPos, CallStackCount);
45594653
}
45604654

45614655
// Capture references from GlobalVariable initializers, which are outside
@@ -4692,13 +4786,20 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
46924786
Abbv->Add(BitCodeAbbrevOp(bitc::FS_COMBINED_ALLOC_INFO));
46934787
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // nummib
46944788
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // numver
4695-
// nummib x (alloc type, numstackids, numstackids x stackidindex),
4789+
// nummib x (alloc type, context radix tree index),
46964790
// numver x version
46974791
// optional: nummib x total size
46984792
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
46994793
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
47004794
unsigned AllocAbbrev = Stream.EmitAbbrev(std::move(Abbv));
47014795

4796+
Abbv = std::make_shared<BitCodeAbbrev>();
4797+
Abbv->Add(BitCodeAbbrevOp(bitc::FS_CONTEXT_RADIX_TREE_ARRAY));
4798+
// n x entry
4799+
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
4800+
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
4801+
unsigned RadixAbbrev = Stream.EmitAbbrev(std::move(Abbv));
4802+
47024803
auto shouldImportValueAsDecl = [&](GlobalValueSummary *GVS) -> bool {
47034804
if (DecSummaries == nullptr)
47044805
return false;
@@ -4735,6 +4836,41 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
47354836
NameVals.clear();
47364837
};
47374838

4839+
// First walk through all the functions and collect the allocation contexts in
4840+
// their associated summaries, for use in constructing a radix tree of
4841+
// contexts. Note that we need to do this in the same order as the functions
4842+
// are processed further below since the call stack positions in the resulting
4843+
// radix tree array are identified based on this order.
4844+
MapVector<CallStackId, llvm::SmallVector<LinearFrameId>> CallStacks;
4845+
forEachSummary([&](GVInfo I, bool IsAliasee) {
4846+
GlobalValueSummary *S = I.second;
4847+
assert(S);
4848+
auto *FS = dyn_cast<FunctionSummary>(S);
4849+
if (!FS)
4850+
return;
4851+
collectMemProfCallStacks(
4852+
FS,
4853+
/*GetStackIndex*/
4854+
[&](unsigned I) {
4855+
// Get the corresponding index into the list of StackIds actually
4856+
// being written for this combined index (which may be a subset in
4857+
// the case of distributed indexes).
4858+
assert(StackIdIndicesToIndex.contains(I));
4859+
return StackIdIndicesToIndex[I];
4860+
},
4861+
CallStacks);
4862+
});
4863+
// Finalize the radix tree, write it out, and get the map of positions in the
4864+
// linearized tree array.
4865+
DenseMap<CallStackId, LinearCallStackId> CallStackPos;
4866+
if (!CallStacks.empty()) {
4867+
CallStackPos =
4868+
writeMemoryProfileRadixTree(std::move(CallStacks), Stream, RadixAbbrev);
4869+
}
4870+
4871+
// Keep track of the current index into the CallStackPos map.
4872+
CallStackId CallStackCount = 0;
4873+
47384874
DenseSet<GlobalValue::GUID> DefOrUseGUIDs;
47394875
forEachSummary([&](GVInfo I, bool IsAliasee) {
47404876
GlobalValueSummary *S = I.second;
@@ -4813,7 +4949,7 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
48134949
assert(StackIdIndicesToIndex.contains(I));
48144950
return StackIdIndicesToIndex[I];
48154951
},
4816-
/*WriteContextSizeInfoIndex*/ false);
4952+
/*WriteContextSizeInfoIndex*/ false, CallStackPos, CallStackCount);
48174953

48184954
NameVals.push_back(*ValueId);
48194955
assert(ModuleIdMap.count(FS->modulePath()));

‎llvm/lib/Bitcode/Writer/CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ add_llvm_component_library(LLVMBitWriter
1212
Core
1313
MC
1414
Object
15+
ProfileData
1516
Support
1617
TargetParser
1718
)

‎llvm/lib/ProfileData/MemProf.cpp

+5
Original file line numberDiff line numberDiff line change
@@ -510,6 +510,7 @@ void CallStackRadixTreeBuilder<FrameIdTy>::build(
510510

511511
// Explicitly instantiate class with the utilized FrameIdTy.
512512
template class CallStackRadixTreeBuilder<FrameId>;
513+
template class CallStackRadixTreeBuilder<LinearFrameId>;
513514

514515
template <typename FrameIdTy>
515516
llvm::DenseMap<FrameIdTy, FrameStat>
@@ -532,6 +533,10 @@ computeFrameHistogram(llvm::MapVector<CallStackId, llvm::SmallVector<FrameIdTy>>
532533
template llvm::DenseMap<FrameId, FrameStat> computeFrameHistogram<FrameId>(
533534
llvm::MapVector<CallStackId, llvm::SmallVector<FrameId>>
534535
&MemProfCallStackData);
536+
template llvm::DenseMap<LinearFrameId, FrameStat>
537+
computeFrameHistogram<LinearFrameId>(
538+
llvm::MapVector<CallStackId, llvm::SmallVector<LinearFrameId>>
539+
&MemProfCallStackData);
535540

536541
void verifyIndexedMemProfRecord(const IndexedMemProfRecord &Record) {
537542
for (const auto &AS : Record.AllocSites) {
Binary file not shown.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
;; Check that we can read the old *_ALLOC_INFO summary format that placed the
2+
;; stack id indexes directly in the alloc info summary, rather than encoding as
3+
;; a separate radix tree.
4+
;;
5+
;; The old bitcode was generated by the older compiler from `opt -thinlto-bc`
6+
;; on the following LLVM assembly:
7+
;;
8+
;; target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
9+
;; target triple = "x86_64-unknown-linux-gnu"
10+
;;
11+
;; define internal ptr @_Z3barv() #0 {
12+
;; entry:
13+
;; %call = call ptr @_Znam(i64 0), !memprof !1, !callsite !6
14+
;; ret ptr null
15+
;; }
16+
;;
17+
;; declare ptr @_Znam(i64)
18+
;;
19+
;; !1 = !{!2, !4}
20+
;; !2 = !{!3, !"notcold"}
21+
;; !3 = !{i64 9086428284934609951, i64 8632435727821051414}
22+
;; !4 = !{!5, !"cold"}
23+
;; !5 = !{i64 9086428284934609951, i64 2732490490862098848}
24+
;; !6 = !{i64 9086428284934609951}
25+
26+
; RUN: llvm-dis %S/Inputs/memprof-old-alloc-context-summary.bc -o - | FileCheck %s
27+
; CHECK: stackIds: (8632435727821051414)
28+
; CHECK-SAME: stackIds: (2732490490862098848)

0 commit comments

Comments
 (0)
Please sign in to comment.