/*
|
* Copyright (C) 2017 The Android Open Source Project
|
*
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
* you may not use this file except in compliance with the License.
|
* You may obtain a copy of the License at
|
*
|
* http://www.apache.org/licenses/LICENSE-2.0
|
*
|
* Unless required by applicable law or agreed to in writing, software
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
* See the License for the specific language governing permissions and
|
* limitations under the License.
|
*/
|
|
#include "experimental.h"
|
#include "slicer/code_ir.h"
|
#include "slicer/control_flow_graph.h"
|
#include "slicer/dex_ir.h"
|
#include "slicer/dex_ir_builder.h"
|
#include "slicer/instrumentation.h"
|
|
#include <string.h>
|
#include <map>
|
#include <memory>
|
#include <vector>
|
|
namespace experimental {
|
|
// Rewrites every method through raising to code IR -> back to bytecode
|
// (also stress the CFG creation)
|
void FullRewrite(std::shared_ptr<ir::DexFile> dex_ir) {
|
for (auto& ir_method : dex_ir->encoded_methods) {
|
if (ir_method->code != nullptr) {
|
lir::CodeIr code_ir(ir_method.get(), dex_ir);
|
lir::ControlFlowGraph cfg_compact(&code_ir, false);
|
lir::ControlFlowGraph cfg_verbose(&code_ir, true);
|
code_ir.Assemble();
|
}
|
}
|
}
|
|
// For every method body in the .dex image, replace invoke-virtual[/range]
|
// instances with a invoke-static[/range] to a fictitious Tracer.WrapInvoke(<args...>)
|
// WrapInvoke() is a static method which takes the same arguments as the
|
// original method plus an explicit "this" argument, and returns the same
|
// type as the original method.
|
void StressWrapInvoke(std::shared_ptr<ir::DexFile> dex_ir) {
|
for (auto& ir_method : dex_ir->encoded_methods) {
|
if (ir_method->code == nullptr) {
|
continue;
|
}
|
|
lir::CodeIr code_ir(ir_method.get(), dex_ir);
|
ir::Builder builder(dex_ir);
|
|
// search for invoke-virtual[/range] bytecodes
|
//
|
// NOTE: we may be removing the current bytecode
|
// from the instructions list so we must use a
|
// different iteration style (it++ is done before
|
// handling *it, not after as in a normal iteration)
|
//
|
auto it = code_ir.instructions.begin();
|
while (it != code_ir.instructions.end()) {
|
auto instr = *it++;
|
auto bytecode = dynamic_cast<lir::Bytecode*>(instr);
|
if (bytecode == nullptr) {
|
continue;
|
}
|
|
dex::Opcode new_call_opcode = dex::OP_NOP;
|
switch (bytecode->opcode) {
|
case dex::OP_INVOKE_VIRTUAL:
|
new_call_opcode = dex::OP_INVOKE_STATIC;
|
break;
|
case dex::OP_INVOKE_VIRTUAL_RANGE:
|
new_call_opcode = dex::OP_INVOKE_STATIC_RANGE;
|
break;
|
default:
|
// skip instruction ...
|
continue;
|
}
|
assert(new_call_opcode != dex::OP_NOP);
|
|
auto orig_method = bytecode->CastOperand<lir::Method>(1)->ir_method;
|
|
// construct the wrapper method declaration
|
std::vector<ir::Type*> param_types;
|
param_types.push_back(orig_method->parent);
|
if (orig_method->prototype->param_types != nullptr) {
|
const auto& orig_param_types = orig_method->prototype->param_types->types;
|
param_types.insert(param_types.end(), orig_param_types.begin(), orig_param_types.end());
|
}
|
|
auto ir_proto = builder.GetProto(orig_method->prototype->return_type,
|
builder.GetTypeList(param_types));
|
|
auto ir_method_decl = builder.GetMethodDecl(builder.GetAsciiString("WrapInvoke"),
|
ir_proto,
|
builder.GetType("LTracer;"));
|
|
auto wraper_method = code_ir.Alloc<lir::Method>(ir_method_decl, ir_method_decl->orig_index);
|
|
// new call bytecode
|
auto new_call = code_ir.Alloc<lir::Bytecode>();
|
new_call->opcode = new_call_opcode;
|
new_call->operands.push_back(bytecode->operands[0]);
|
new_call->operands.push_back(wraper_method);
|
code_ir.instructions.InsertBefore(bytecode, new_call);
|
|
// remove the old call bytecode
|
//
|
// NOTE: we can mutate the original bytecode directly
|
// since the instructions can't have multiple references
|
// in the code IR, but for testing purposes we'll do it
|
// the hard way here
|
//
|
code_ir.instructions.Remove(bytecode);
|
}
|
|
code_ir.Assemble();
|
}
|
}
|
|
// For every method in the .dex image, insert an "entry hook" call
|
// to a fictitious method: Tracer.OnEntry(<args...>). OnEntry() has the
|
// same argument types as the instrumented method plus an explicit
|
// "this" for non-static methods. On entry to the instumented method
|
// we'll call OnEntry() with the values of the incoming arguments.
|
//
|
// NOTE: the entry hook will forward all the incoming arguments
|
// so we need to define an Tracer.OnEntry overload for every method
|
// signature. This means that for very large .dex images, approaching
|
// the 64k method limit, we might not be able to allocate new method declarations.
|
// (which is ok, and a good test case, since this is a stress scenario)
|
//
|
void StressEntryHook(std::shared_ptr<ir::DexFile> dex_ir) {
|
for (auto& ir_method : dex_ir->encoded_methods) {
|
if (ir_method->code == nullptr) {
|
continue;
|
}
|
|
lir::CodeIr code_ir(ir_method.get(), dex_ir);
|
ir::Builder builder(dex_ir);
|
|
// 1. construct call target
|
std::vector<ir::Type*> param_types;
|
if ((ir_method->access_flags & dex::kAccStatic) == 0) {
|
param_types.push_back(ir_method->decl->parent);
|
}
|
if (ir_method->decl->prototype->param_types != nullptr) {
|
const auto& orig_param_types = ir_method->decl->prototype->param_types->types;
|
param_types.insert(param_types.end(), orig_param_types.begin(), orig_param_types.end());
|
}
|
|
auto ir_proto = builder.GetProto(builder.GetType("V"),
|
builder.GetTypeList(param_types));
|
|
auto ir_method_decl = builder.GetMethodDecl(builder.GetAsciiString("OnEntry"),
|
ir_proto,
|
builder.GetType("LTracer;"));
|
|
auto target_method = code_ir.Alloc<lir::Method>(ir_method_decl, ir_method_decl->orig_index);
|
|
// 2. argument registers
|
auto regs = ir_method->code->registers;
|
auto args_count = ir_method->code->ins_count;
|
auto args = code_ir.Alloc<lir::VRegRange>(regs - args_count, args_count);
|
|
// 3. call bytecode
|
auto call = code_ir.Alloc<lir::Bytecode>();
|
call->opcode = dex::OP_INVOKE_STATIC_RANGE;
|
call->operands.push_back(args);
|
call->operands.push_back(target_method);
|
|
// 4. insert the hook before the first bytecode
|
for (auto instr : code_ir.instructions) {
|
auto bytecode = dynamic_cast<lir::Bytecode*>(instr);
|
if (bytecode == nullptr) {
|
continue;
|
}
|
code_ir.instructions.InsertBefore(bytecode, call);
|
break;
|
}
|
|
code_ir.Assemble();
|
}
|
}
|
|
// For every method in the .dex image, insert an "exit hook" call
|
// to a fictitious method: Tracer.OnExit(<return value...>).
|
// OnExit() is called right before returning from the instrumented
|
// method (on the non-exceptional path) and it will be passed the return
|
// value, if any. For non-void return types, the return value from OnExit()
|
// will also be used as the return value of the instrumented method.
|
void StressExitHook(std::shared_ptr<ir::DexFile> dex_ir) {
|
for (auto& ir_method : dex_ir->encoded_methods) {
|
if (ir_method->code == nullptr) {
|
continue;
|
}
|
|
lir::CodeIr code_ir(ir_method.get(), dex_ir);
|
ir::Builder builder(dex_ir);
|
|
// do we have a void-return method?
|
bool return_void =
|
::strcmp(ir_method->decl->prototype->return_type->descriptor->c_str(), "V") == 0;
|
|
// 1. construct call target
|
std::vector<ir::Type*> param_types;
|
if (!return_void) {
|
param_types.push_back(ir_method->decl->prototype->return_type);
|
}
|
|
auto ir_proto = builder.GetProto(ir_method->decl->prototype->return_type,
|
builder.GetTypeList(param_types));
|
|
auto ir_method_decl = builder.GetMethodDecl(builder.GetAsciiString("OnExit"),
|
ir_proto,
|
builder.GetType("LTracer;"));
|
|
auto target_method = code_ir.Alloc<lir::Method>(ir_method_decl, ir_method_decl->orig_index);
|
|
// 2. find and instrument the return instructions
|
for (auto instr : code_ir.instructions) {
|
auto bytecode = dynamic_cast<lir::Bytecode*>(instr);
|
if (bytecode == nullptr) {
|
continue;
|
}
|
|
dex::Opcode move_result_opcode = dex::OP_NOP;
|
dex::u4 reg = 0;
|
int reg_count = 0;
|
|
switch (bytecode->opcode) {
|
case dex::OP_RETURN_VOID:
|
SLICER_CHECK(return_void);
|
break;
|
case dex::OP_RETURN:
|
SLICER_CHECK(!return_void);
|
move_result_opcode = dex::OP_MOVE_RESULT;
|
reg = bytecode->CastOperand<lir::VReg>(0)->reg;
|
reg_count = 1;
|
break;
|
case dex::OP_RETURN_OBJECT:
|
SLICER_CHECK(!return_void);
|
move_result_opcode = dex::OP_MOVE_RESULT_OBJECT;
|
reg = bytecode->CastOperand<lir::VReg>(0)->reg;
|
reg_count = 1;
|
break;
|
case dex::OP_RETURN_WIDE:
|
SLICER_CHECK(!return_void);
|
move_result_opcode = dex::OP_MOVE_RESULT_WIDE;
|
reg = bytecode->CastOperand<lir::VRegPair>(0)->base_reg;
|
reg_count = 2;
|
break;
|
default:
|
// skip the bytecode...
|
continue;
|
}
|
|
// the call bytecode
|
auto args = code_ir.Alloc<lir::VRegRange>(reg, reg_count);
|
auto call = code_ir.Alloc<lir::Bytecode>();
|
call->opcode = dex::OP_INVOKE_STATIC_RANGE;
|
call->operands.push_back(args);
|
call->operands.push_back(target_method);
|
code_ir.instructions.InsertBefore(bytecode, call);
|
|
// move result back to the right register
|
//
|
// NOTE: we're reusing the original return's operand,
|
// which is valid and more efficient than allocating
|
// a new LIR node, but it's also fragile: we need to be
|
// very careful about mutating shared nodes.
|
//
|
if (move_result_opcode != dex::OP_NOP) {
|
auto move_result = code_ir.Alloc<lir::Bytecode>();
|
move_result->opcode = move_result_opcode;
|
move_result->operands.push_back(bytecode->operands[0]);
|
code_ir.instructions.InsertBefore(bytecode, move_result);
|
}
|
}
|
|
code_ir.Assemble();
|
}
|
}
|
|
// Test slicer::MethodInstrumenter
|
void TestMethodInstrumenter(std::shared_ptr<ir::DexFile> dex_ir) {
|
slicer::MethodInstrumenter mi(dex_ir);
|
mi.AddTransformation<slicer::EntryHook>(ir::MethodId("LTracer;", "onFooEntry"), true);
|
mi.AddTransformation<slicer::EntryHook>(ir::MethodId("LTracer;", "onFooEntry"), false);
|
mi.AddTransformation<slicer::ExitHook>(ir::MethodId("LTracer;", "onFooExit"));
|
mi.AddTransformation<slicer::DetourVirtualInvoke>(
|
ir::MethodId("LBase;", "foo", "(ILjava/lang/String;)I"),
|
ir::MethodId("LTracer;", "wrapFoo"));
|
mi.AddTransformation<slicer::DetourInterfaceInvoke>(
|
ir::MethodId("LIBase;", "bar", "(Ljava/lang/String;)V"),
|
ir::MethodId("LTracer;", "wrapBar"));
|
|
auto method1 = ir::MethodId("LTarget;", "foo", "(ILjava/lang/String;)I");
|
SLICER_CHECK(mi.InstrumentMethod(method1));
|
|
auto method2 = ir::MethodId("LTarget;", "foo", "(I[[Ljava/lang/String;)Ljava/lang/Integer;");
|
SLICER_CHECK(mi.InstrumentMethod(method2));
|
}
|
|
// Stress scratch register allocation
|
void StressScratchRegs(std::shared_ptr<ir::DexFile> dex_ir) {
|
slicer::MethodInstrumenter mi(dex_ir);
|
|
// queue multiple allocations to stress corner cases (various counts and alignments)
|
auto t1 = mi.AddTransformation<slicer::AllocateScratchRegs>(1, false);
|
auto t2 = mi.AddTransformation<slicer::AllocateScratchRegs>(1, false);
|
auto t3 = mi.AddTransformation<slicer::AllocateScratchRegs>(1);
|
auto t4 = mi.AddTransformation<slicer::AllocateScratchRegs>(20);
|
|
// apply the transformations to every single method
|
for (auto& ir_method : dex_ir->encoded_methods) {
|
if (ir_method->code != nullptr) {
|
SLICER_CHECK(mi.InstrumentMethod(ir_method.get()));
|
SLICER_CHECK(t1->ScratchRegs().size() == 1);
|
SLICER_CHECK(t2->ScratchRegs().size() == 1);
|
SLICER_CHECK(t3->ScratchRegs().size() == 1);
|
SLICER_CHECK(t4->ScratchRegs().size() == 20);
|
}
|
}
|
}
|
|
// Sample code coverage instrumentation: on the entry of every
|
// basic block, inject a call to a tracing method:
|
//
|
// CodeCoverage.TraceBasicBlock(block_id)
|
//
|
void CodeCoverage(std::shared_ptr<ir::DexFile> dex_ir) {
|
ir::Builder builder(dex_ir);
|
slicer::AllocateScratchRegs alloc_regs(1);
|
int basic_block_id = 1;
|
|
constexpr const char* kTracerClass = "LCodeCoverage;";
|
|
// create the tracing method declaration
|
std::vector<ir::Type*> param_types { builder.GetType("I") };
|
auto ir_proto =
|
builder.GetProto(builder.GetType("V"),
|
builder.GetTypeList(param_types));
|
auto ir_method_decl =
|
builder.GetMethodDecl(builder.GetAsciiString("TraceBasicBlock"),
|
ir_proto,
|
builder.GetType(kTracerClass));
|
|
// instrument every method (except for the tracer class methods)
|
for (auto& ir_method : dex_ir->encoded_methods) {
|
if (ir_method->code == nullptr) {
|
continue;
|
}
|
|
// don't instrument the methods of the tracer class
|
if (std::strcmp(ir_method->decl->parent->descriptor->c_str(), kTracerClass) == 0) {
|
continue;
|
}
|
|
lir::CodeIr code_ir(ir_method.get(), dex_ir);
|
lir::ControlFlowGraph cfg(&code_ir, true);
|
|
// allocate a scratch register
|
//
|
// NOTE: we're assuming this does not change the CFG!
|
// (this is the case here, but transformations which
|
// alter the basic blocks boundaries or the code flow
|
// would invalidate existing CFGs)
|
//
|
alloc_regs.Apply(&code_ir);
|
dex::u4 scratch_reg = *alloc_regs.ScratchRegs().begin();
|
|
// TODO: handle very "high" registers
|
if (scratch_reg > 0xff) {
|
printf("WARNING: can't instrument method %s.%s%s\n",
|
ir_method->decl->parent->Decl().c_str(),
|
ir_method->decl->name->c_str(),
|
ir_method->decl->prototype->Signature().c_str());
|
continue;
|
}
|
|
auto tracing_method = code_ir.Alloc<lir::Method>(ir_method_decl, ir_method_decl->orig_index);
|
|
// instrument each basic block entry point
|
for (const auto& block : cfg.basic_blocks) {
|
// generate the map of basic blocks
|
printf("%8u: mi=%u s=%u e=%u\n",
|
static_cast<dex::u4>(basic_block_id),
|
ir_method->decl->orig_index,
|
block.region.first->offset,
|
block.region.last->offset);
|
|
// find first bytecode in the basic block
|
lir::Instruction* trace_point = nullptr;
|
for (auto instr = block.region.first; instr != nullptr; instr = instr->next) {
|
trace_point = dynamic_cast<lir::Bytecode*>(instr);
|
if (trace_point != nullptr || instr == block.region.last) {
|
break;
|
}
|
}
|
SLICER_CHECK(trace_point != nullptr);
|
|
// special case: don't separate 'move-result-<kind>' from the preceding invoke
|
auto opcode = static_cast<lir::Bytecode*>(trace_point)->opcode;
|
if (opcode == dex::OP_MOVE_RESULT ||
|
opcode == dex::OP_MOVE_RESULT_WIDE ||
|
opcode == dex::OP_MOVE_RESULT_OBJECT) {
|
trace_point = trace_point->next;
|
}
|
|
// arg_reg = block_id
|
auto load_block_id = code_ir.Alloc<lir::Bytecode>();
|
load_block_id->opcode = dex::OP_CONST;
|
load_block_id->operands.push_back(code_ir.Alloc<lir::VReg>(scratch_reg));
|
load_block_id->operands.push_back(code_ir.Alloc<lir::Const32>(basic_block_id));
|
code_ir.instructions.InsertBefore(trace_point, load_block_id);
|
|
// call the tracing method
|
auto trace_call = code_ir.Alloc<lir::Bytecode>();
|
trace_call->opcode = dex::OP_INVOKE_STATIC_RANGE;
|
trace_call->operands.push_back(code_ir.Alloc<lir::VRegRange>(scratch_reg, 1));
|
trace_call->operands.push_back(tracing_method);
|
code_ir.instructions.InsertBefore(trace_point, trace_call);
|
|
++basic_block_id;
|
}
|
|
code_ir.Assemble();
|
}
|
}
|
|
// Stress the roundtrip: EncodedMethod -> MethodId -> FindMethod -> EncodedMethod
|
// NOTE: until we start indexing methods this test is slow on debug builds + large .dex images
|
void StressFindMethod(std::shared_ptr<ir::DexFile> dex_ir) {
|
ir::Builder builder(dex_ir);
|
int method_count = 0;
|
for (auto& ir_method : dex_ir->encoded_methods) {
|
auto decl = ir_method->decl;
|
auto signature = decl->prototype->Signature();
|
auto class_descriptor = decl->parent->descriptor;
|
ir::MethodId method_id(class_descriptor->c_str(), decl->name->c_str(), signature.c_str());
|
SLICER_CHECK(builder.FindMethod(method_id) == ir_method.get());
|
++method_count;
|
}
|
printf("Everything looks fine, found %d methods.\n", method_count);
|
}
|
|
static void PrintHistogram(const std::map<int, int> histogram, const char* name) {
|
constexpr int kHistogramWidth = 100;
|
int max_count = 0;
|
for (const auto& kv : histogram) {
|
max_count = std::max(max_count, kv.second);
|
}
|
printf("\nHistogram: %s [max_count=%d]\n\n", name, max_count);
|
for (const auto& kv : histogram) {
|
printf("%6d [ %3d ] ", kv.second, kv.first);
|
int hist_len = static_cast<int>(static_cast<double>(kv.second) / max_count * kHistogramWidth);
|
for (int i = 0; i <= hist_len; ++i) {
|
printf("*");
|
}
|
printf("\n");
|
}
|
}
|
|
// Builds a histogram of registers count per method
|
void RegsHistogram(std::shared_ptr<ir::DexFile> dex_ir) {
|
std::map<int, int> regs_histogram;
|
std::map<int, int> param_histogram;
|
std::map<int, int> extra_histogram;
|
for (auto& ir_method : dex_ir->encoded_methods) {
|
if (ir_method->code != nullptr) {
|
const int regs = ir_method->code->registers;
|
const int ins = ir_method->code->ins_count;
|
SLICER_CHECK(regs >= ins);
|
regs_histogram[regs]++;
|
param_histogram[ins]++;
|
extra_histogram[regs - ins]++;
|
}
|
}
|
PrintHistogram(regs_histogram, "Method registers");
|
PrintHistogram(param_histogram, "Method parameter registers");
|
PrintHistogram(regs_histogram, "Method extra registers (total - parameters)");
|
}
|
|
void ListExperiments(std::shared_ptr<ir::DexFile> dex_ir);
|
|
using Experiment = void (*)(std::shared_ptr<ir::DexFile>);
|
|
// the registry of available experiments
|
std::map<std::string, Experiment> experiments_registry = {
|
{ "list_experiments", &ListExperiments },
|
{ "full_rewrite", &FullRewrite },
|
{ "stress_entry_hook", &StressEntryHook },
|
{ "stress_exit_hook", &StressExitHook },
|
{ "stress_wrap_invoke", &StressWrapInvoke },
|
{ "test_method_instrumenter", &TestMethodInstrumenter },
|
{ "stress_find_method", &StressFindMethod },
|
{ "stress_scratch_regs", &StressScratchRegs },
|
{ "regs_histogram", &RegsHistogram },
|
{ "code_coverage", &CodeCoverage },
|
};
|
|
// Lists all the registered experiments
|
void ListExperiments(std::shared_ptr<ir::DexFile> dex_ir) {
|
printf("\nAvailable experiments:\n");
|
printf("-------------------------\n");
|
for (auto& e : experiments_registry) {
|
printf(" %s\n", e.first.c_str());
|
}
|
printf("-------------------------\n\n");
|
}
|
|
// Driver for running experiments
|
void Run(const char* experiment, std::shared_ptr<ir::DexFile> dex_ir) {
|
auto it = experiments_registry.find(experiment);
|
if (it == experiments_registry.end()) {
|
printf("\nUnknown experiment '%s'\n", experiment);
|
ListExperiments(dex_ir);
|
exit(1);
|
}
|
|
// running the experiment entry point
|
(*it->second)(dex_ir);
|
}
|
|
} // namespace experimental
|