60
60
#include " llvm/MC/StringTableBuilder.h"
61
61
#include " llvm/MC/TargetRegistry.h"
62
62
#include " llvm/Object/IRSymtab.h"
63
+ #include " llvm/ProfileData/MemProf.h"
63
64
#include " llvm/Support/AtomicOrdering.h"
64
65
#include " llvm/Support/Casting.h"
65
66
#include " llvm/Support/CommandLine.h"
83
84
#include < vector>
84
85
85
86
using namespace llvm ;
87
+ using namespace llvm ::memprof;
86
88
87
89
static cl::opt<unsigned >
88
90
IndexThreshold (" bitcode-mdindex-threshold" , cl::Hidden, cl::init(25 ),
@@ -231,7 +233,8 @@ class ModuleBitcodeWriterBase : public BitcodeWriterBase {
231
233
SmallVector<uint64_t , 64 > &NameVals, GlobalValueSummary *Summary,
232
234
unsigned ValueID, unsigned FSCallsAbbrev, unsigned FSCallsProfileAbbrev,
233
235
unsigned CallsiteAbbrev, unsigned AllocAbbrev, unsigned ContextIdAbbvId,
234
- const Function &F);
236
+ const Function &F, DenseMap<CallStackId, LinearCallStackId> &CallStackPos,
237
+ CallStackId &CallStackCount);
235
238
void writeModuleLevelReferences (const GlobalVariable &V,
236
239
SmallVector<uint64_t , 64 > &NameVals,
237
240
unsigned FSModRefsAbbrev,
@@ -4195,12 +4198,58 @@ static void writeTypeIdCompatibleVtableSummaryRecord(
4195
4198
}
4196
4199
}
4197
4200
4201
+ // Adds the allocation contexts to the CallStacks map. We simply use the
4202
+ // size at the time the context was added as the CallStackId. This works because
4203
+ // when we look up the call stacks later on we process the function summaries
4204
+ // and their allocation records in the same exact order.
4205
+ static void collectMemProfCallStacks (
4206
+ FunctionSummary *FS, std::function<LinearFrameId(unsigned )> GetStackIndex,
4207
+ MapVector<CallStackId, llvm::SmallVector<LinearFrameId>> &CallStacks) {
4208
+ // The interfaces in ProfileData/MemProf.h use a type alias for a stack frame
4209
+ // id offset into the index of the full stack frames. The ModuleSummaryIndex
4210
+ // currently uses unsigned. Make sure these stay in sync.
4211
+ static_assert (std::is_same_v<LinearFrameId, unsigned >);
4212
+ for (auto &AI : FS->allocs ()) {
4213
+ for (auto &MIB : AI.MIBs ) {
4214
+ SmallVector<unsigned > StackIdIndices;
4215
+ StackIdIndices.reserve (MIB.StackIdIndices .size ());
4216
+ for (auto Id : MIB.StackIdIndices )
4217
+ StackIdIndices.push_back (GetStackIndex (Id));
4218
+ // The CallStackId is the size at the time this context was inserted.
4219
+ CallStacks.insert ({CallStacks.size (), StackIdIndices});
4220
+ }
4221
+ }
4222
+ }
4223
+
4224
+ // Build the radix tree from the accumulated CallStacks, write out the resulting
4225
+ // linearized radix tree array, and return the map of call stack positions into
4226
+ // this array for use when writing the allocation records. The returned map is
4227
+ // indexed by a CallStackId which in this case is implicitly determined by the
4228
+ // order of function summaries and their allocation infos being written.
4229
+ static DenseMap<CallStackId, LinearCallStackId> writeMemoryProfileRadixTree (
4230
+ MapVector<CallStackId, llvm::SmallVector<LinearFrameId>> &&CallStacks,
4231
+ BitstreamWriter &Stream, unsigned RadixAbbrev) {
4232
+ assert (!CallStacks.empty ());
4233
+ DenseMap<unsigned , FrameStat> FrameHistogram =
4234
+ computeFrameHistogram<LinearFrameId>(CallStacks);
4235
+ CallStackRadixTreeBuilder<LinearFrameId> Builder;
4236
+ // We don't need a MemProfFrameIndexes map as we have already converted the
4237
+ // full stack id hash to a linear offset into the StackIds array.
4238
+ Builder.build (std::move (CallStacks), /* MemProfFrameIndexes=*/ std::nullopt,
4239
+ FrameHistogram);
4240
+ Stream.EmitRecord (bitc::FS_CONTEXT_RADIX_TREE_ARRAY, Builder.getRadixArray (),
4241
+ RadixAbbrev);
4242
+ return Builder.takeCallStackPos ();
4243
+ }
4244
+
4198
4245
static void writeFunctionHeapProfileRecords (
4199
4246
BitstreamWriter &Stream, FunctionSummary *FS, unsigned CallsiteAbbrev,
4200
4247
unsigned AllocAbbrev, unsigned ContextIdAbbvId, bool PerModule,
4201
4248
std::function<unsigned (const ValueInfo &VI)> GetValueID,
4202
4249
std::function<unsigned(unsigned )> GetStackIndex,
4203
- bool WriteContextSizeInfoIndex) {
4250
+ bool WriteContextSizeInfoIndex,
4251
+ DenseMap<CallStackId, LinearCallStackId> &CallStackPos,
4252
+ CallStackId &CallStackCount) {
4204
4253
SmallVector<uint64_t > Record;
4205
4254
4206
4255
for (auto &CI : FS->callsites ()) {
@@ -4234,9 +4283,9 @@ static void writeFunctionHeapProfileRecords(
4234
4283
Record.push_back (AI.Versions .size ());
4235
4284
for (auto &MIB : AI.MIBs ) {
4236
4285
Record.push_back ((uint8_t )MIB.AllocType );
4237
- Record. push_back (MIB. StackIdIndices . size ());
4238
- for ( auto Id : MIB. StackIdIndices )
4239
- Record.push_back (GetStackIndex (Id) );
4286
+ // Record the index into the radix tree array for this context.
4287
+ assert (CallStackCount <= CallStackPos. size ());
4288
+ Record.push_back (CallStackPos[CallStackCount++] );
4240
4289
}
4241
4290
if (!PerModule) {
4242
4291
for (auto V : AI.Versions )
@@ -4282,7 +4331,9 @@ void ModuleBitcodeWriterBase::writePerModuleFunctionSummaryRecord(
4282
4331
SmallVector<uint64_t , 64 > &NameVals, GlobalValueSummary *Summary,
4283
4332
unsigned ValueID, unsigned FSCallsRelBFAbbrev,
4284
4333
unsigned FSCallsProfileAbbrev, unsigned CallsiteAbbrev,
4285
- unsigned AllocAbbrev, unsigned ContextIdAbbvId, const Function &F) {
4334
+ unsigned AllocAbbrev, unsigned ContextIdAbbvId, const Function &F,
4335
+ DenseMap<CallStackId, LinearCallStackId> &CallStackPos,
4336
+ CallStackId &CallStackCount) {
4286
4337
NameVals.push_back (ValueID);
4287
4338
4288
4339
FunctionSummary *FS = cast<FunctionSummary>(Summary);
@@ -4297,7 +4348,7 @@ void ModuleBitcodeWriterBase::writePerModuleFunctionSummaryRecord(
4297
4348
/* PerModule*/ true ,
4298
4349
/* GetValueId*/ [&](const ValueInfo &VI) { return getValueId (VI); },
4299
4350
/* GetStackIndex*/ [&](unsigned I) { return I; },
4300
- /* WriteContextSizeInfoIndex*/ true );
4351
+ /* WriteContextSizeInfoIndex*/ true , CallStackPos, CallStackCount );
4301
4352
4302
4353
auto SpecialRefCnts = FS->specialRefCounts ();
4303
4354
NameVals.push_back (getEncodedGVSummaryFlags (FS->flags ()));
@@ -4530,12 +4581,54 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() {
4530
4581
Abbv = std::make_shared<BitCodeAbbrev>();
4531
4582
Abbv->Add (BitCodeAbbrevOp (bitc::FS_PERMODULE_ALLOC_INFO));
4532
4583
Abbv->Add (BitCodeAbbrevOp (BitCodeAbbrevOp::VBR, 4 )); // nummib
4533
- // n x (alloc type, numstackids, numstackids x stackidindex )
4584
+ // n x (alloc type, context radix tree index )
4534
4585
// optional: nummib x (numcontext x total size)
4535
4586
Abbv->Add (BitCodeAbbrevOp (BitCodeAbbrevOp::Array));
4536
4587
Abbv->Add (BitCodeAbbrevOp (BitCodeAbbrevOp::VBR, 8 ));
4537
4588
unsigned AllocAbbrev = Stream.EmitAbbrev (std::move (Abbv));
4538
4589
4590
+ Abbv = std::make_shared<BitCodeAbbrev>();
4591
+ Abbv->Add (BitCodeAbbrevOp (bitc::FS_CONTEXT_RADIX_TREE_ARRAY));
4592
+ // n x entry
4593
+ Abbv->Add (BitCodeAbbrevOp (BitCodeAbbrevOp::Array));
4594
+ Abbv->Add (BitCodeAbbrevOp (BitCodeAbbrevOp::VBR, 8 ));
4595
+ unsigned RadixAbbrev = Stream.EmitAbbrev (std::move (Abbv));
4596
+
4597
+ // First walk through all the functions and collect the allocation contexts in
4598
+ // their associated summaries, for use in constructing a radix tree of
4599
+ // contexts. Note that we need to do this in the same order as the functions
4600
+ // are processed further below since the call stack positions in the resulting
4601
+ // radix tree array are identified based on this order.
4602
+ MapVector<CallStackId, llvm::SmallVector<LinearFrameId>> CallStacks;
4603
+ for (const Function &F : M) {
4604
+ // Summary emission does not support anonymous functions, they have to be
4605
+ // renamed using the anonymous function renaming pass.
4606
+ if (!F.hasName ())
4607
+ report_fatal_error (" Unexpected anonymous function when writing summary" );
4608
+
4609
+ ValueInfo VI = Index->getValueInfo (F.getGUID ());
4610
+ if (!VI || VI.getSummaryList ().empty ()) {
4611
+ // Only declarations should not have a summary (a declaration might
4612
+ // however have a summary if the def was in module level asm).
4613
+ assert (F.isDeclaration ());
4614
+ continue ;
4615
+ }
4616
+ auto *Summary = VI.getSummaryList ()[0 ].get ();
4617
+ FunctionSummary *FS = cast<FunctionSummary>(Summary);
4618
+ collectMemProfCallStacks (
4619
+ FS, /* GetStackIndex*/ [](unsigned I) { return I; }, CallStacks);
4620
+ }
4621
+ // Finalize the radix tree, write it out, and get the map of positions in the
4622
+ // linearized tree array.
4623
+ DenseMap<CallStackId, LinearCallStackId> CallStackPos;
4624
+ if (!CallStacks.empty ()) {
4625
+ CallStackPos =
4626
+ writeMemoryProfileRadixTree (std::move (CallStacks), Stream, RadixAbbrev);
4627
+ }
4628
+
4629
+ // Keep track of the current index into the CallStackPos map.
4630
+ CallStackId CallStackCount = 0 ;
4631
+
4539
4632
SmallVector<uint64_t , 64 > NameVals;
4540
4633
// Iterate over the list of functions instead of the Index to
4541
4634
// ensure the ordering is stable.
@@ -4555,7 +4648,8 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() {
4555
4648
auto *Summary = VI.getSummaryList ()[0 ].get ();
4556
4649
writePerModuleFunctionSummaryRecord (
4557
4650
NameVals, Summary, VE.getValueID (&F), FSCallsRelBFAbbrev,
4558
- FSCallsProfileAbbrev, CallsiteAbbrev, AllocAbbrev, ContextIdAbbvId, F);
4651
+ FSCallsProfileAbbrev, CallsiteAbbrev, AllocAbbrev, ContextIdAbbvId, F,
4652
+ CallStackPos, CallStackCount);
4559
4653
}
4560
4654
4561
4655
// Capture references from GlobalVariable initializers, which are outside
@@ -4692,13 +4786,20 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
4692
4786
Abbv->Add (BitCodeAbbrevOp (bitc::FS_COMBINED_ALLOC_INFO));
4693
4787
Abbv->Add (BitCodeAbbrevOp (BitCodeAbbrevOp::VBR, 4 )); // nummib
4694
4788
Abbv->Add (BitCodeAbbrevOp (BitCodeAbbrevOp::VBR, 4 )); // numver
4695
- // nummib x (alloc type, numstackids, numstackids x stackidindex ),
4789
+ // nummib x (alloc type, context radix tree index ),
4696
4790
// numver x version
4697
4791
// optional: nummib x total size
4698
4792
Abbv->Add (BitCodeAbbrevOp (BitCodeAbbrevOp::Array));
4699
4793
Abbv->Add (BitCodeAbbrevOp (BitCodeAbbrevOp::VBR, 8 ));
4700
4794
unsigned AllocAbbrev = Stream.EmitAbbrev (std::move (Abbv));
4701
4795
4796
+ Abbv = std::make_shared<BitCodeAbbrev>();
4797
+ Abbv->Add (BitCodeAbbrevOp (bitc::FS_CONTEXT_RADIX_TREE_ARRAY));
4798
+ // n x entry
4799
+ Abbv->Add (BitCodeAbbrevOp (BitCodeAbbrevOp::Array));
4800
+ Abbv->Add (BitCodeAbbrevOp (BitCodeAbbrevOp::VBR, 8 ));
4801
+ unsigned RadixAbbrev = Stream.EmitAbbrev (std::move (Abbv));
4802
+
4702
4803
auto shouldImportValueAsDecl = [&](GlobalValueSummary *GVS) -> bool {
4703
4804
if (DecSummaries == nullptr )
4704
4805
return false ;
@@ -4735,6 +4836,41 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
4735
4836
NameVals.clear ();
4736
4837
};
4737
4838
4839
+ // First walk through all the functions and collect the allocation contexts in
4840
+ // their associated summaries, for use in constructing a radix tree of
4841
+ // contexts. Note that we need to do this in the same order as the functions
4842
+ // are processed further below since the call stack positions in the resulting
4843
+ // radix tree array are identified based on this order.
4844
+ MapVector<CallStackId, llvm::SmallVector<LinearFrameId>> CallStacks;
4845
+ forEachSummary ([&](GVInfo I, bool IsAliasee) {
4846
+ GlobalValueSummary *S = I.second ;
4847
+ assert (S);
4848
+ auto *FS = dyn_cast<FunctionSummary>(S);
4849
+ if (!FS)
4850
+ return ;
4851
+ collectMemProfCallStacks (
4852
+ FS,
4853
+ /* GetStackIndex*/
4854
+ [&](unsigned I) {
4855
+ // Get the corresponding index into the list of StackIds actually
4856
+ // being written for this combined index (which may be a subset in
4857
+ // the case of distributed indexes).
4858
+ assert (StackIdIndicesToIndex.contains (I));
4859
+ return StackIdIndicesToIndex[I];
4860
+ },
4861
+ CallStacks);
4862
+ });
4863
+ // Finalize the radix tree, write it out, and get the map of positions in the
4864
+ // linearized tree array.
4865
+ DenseMap<CallStackId, LinearCallStackId> CallStackPos;
4866
+ if (!CallStacks.empty ()) {
4867
+ CallStackPos =
4868
+ writeMemoryProfileRadixTree (std::move (CallStacks), Stream, RadixAbbrev);
4869
+ }
4870
+
4871
+ // Keep track of the current index into the CallStackPos map.
4872
+ CallStackId CallStackCount = 0 ;
4873
+
4738
4874
DenseSet<GlobalValue::GUID> DefOrUseGUIDs;
4739
4875
forEachSummary ([&](GVInfo I, bool IsAliasee) {
4740
4876
GlobalValueSummary *S = I.second ;
@@ -4813,7 +4949,7 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
4813
4949
assert (StackIdIndicesToIndex.contains (I));
4814
4950
return StackIdIndicesToIndex[I];
4815
4951
},
4816
- /* WriteContextSizeInfoIndex*/ false );
4952
+ /* WriteContextSizeInfoIndex*/ false , CallStackPos, CallStackCount );
4817
4953
4818
4954
NameVals.push_back (*ValueId);
4819
4955
assert (ModuleIdMap.count (FS->modulePath ()));
0 commit comments