本文主要是在看《Inside the C++ Object Model》的时候,想通过案例的方式加深一下理解。
首先,由于编译器有内存对齐的优化,比如:
#include <iostream>
class WithAlign {
int a;
char b;
};
#pragma pack(push, 1)
class WithoutAlign {
int a;
char b;
};
#pragma pack(pop)
int main() {
std::cout << "WithAlign: " << sizeof(WithAlign) << std::endl;
std::cout << "WithoutAlign: " << sizeof(WithoutAlign) << std::endl;
}
// Output:
// WithAlign: 8
// WithoutAlign: 5
内存对齐的类大小为8(按int 4字节对齐),未对齐的为5(int + char)。这里为了更容易理解,全部默认使用1字节对齐。
注意,本文中的内容均仅在自己的机器的Docker容器中做测试,环境为:64位 ubuntu 16.04, gcc 5.5。
# lsb_release -a
Distributor ID: Ubuntu
Description: Ubuntu 16.04.7 LTS
Release: 16.04
Codename: xenial
# uname -a
Linux 06f25c7abffd 5.15.49-linuxkit #1 SMP Tue Sep 13 07:51:46 UTC 2022 x86_64 x86_64 x86_64 GNU/Linux
# gcc --version
gcc (Ubuntu 5.5.0-12ubuntu1~16.04) 5.5.0 20171010
何为C++对象模型?
引用《深度探索C++对象模型》这本书中的话:
有两个概念可以解释C++对象模型:
- 语言中直接支持面向对象程序设计的部分。包括构造函数、析构函数、多态、虚函数等。
- 对于各种支持的底层实现机制。这部分并没有标准化,因此不同的编译器可以有自己的实现。
C++数据存放
C++变量有两种:
- static:也称为类变量、类静态变量,由同一个类的所有实例共享。
- non-static,也称为成员变量,每个类的实例均持有自己的一份。
类成员函数由三种:
- static:静态函数,属于该类,不需要实例也可以调用。
- non-static:成员函数,一般需要实例才可以调用。
- virtual:虚函数,用于实现多态。
例子:
#include <iostream>
#pragma pack(push, 1)
class Base {
public:
Base(int d) : data_(d) { ++instance_cnt_; } // constructor, non-static function
virtual ~Base() {} // destructor, virtual function
static int getInstanceCnt() { return instance_cnt_; } // static function
int getData() { return data_; } // non-static function
virtual void print() { // virtual function
std::cout << "[Base] address: " << this << " data: " << this->data_ << std::endl;
}
public:
int data_; // non-static data
static int instance_cnt_; // static data
};
int Base::instance_cnt_ = 0;
#pragma pack(pop)
int main() {
Base a(100);
Base b(200);
std::cout << "Size " << sizeof(Base) << " InstanceCnt " << Base::getInstanceCnt() << " " << a.getInstanceCnt()
<< " " << b.getInstanceCnt() << std::endl;
a.print();
b.print();
}
// Output:
// Size 12 InstanceCnt 2 2 2
// [Base] address: 0x7ffffe4bf670 data: 100
// [Base] address: 0x7ffffe4bf67c data: 200
存储方式:
- static data:单独存放,不计入class的size中。
- non-static data:在实例存放,计入class的size。
- static function:单独存放
- non-static function:单独存放
- virtual function:单独存放,一个具体类对应的虚函数会整合进一个表中,表中存放了虚函数的指针等信息,实例存放一到多个指向虚表的指针。
这里可以看到Base类的size为12,其实就是存放了vptr
和int data_
这两个数据导致的。
那么我们就通过编译器和代码,来具体看一下每一部分的数据的存放方式。
#include <iostream>
#pragma pack(push, 1)
class Base {
public:
Base(int d) : data_(d) { ++instance_cnt_; } // constructor, non-static function
virtual ~Base() { std::cout << "destructor: " << this << std::endl; } // destructor, virtual function
static int getInstanceCnt() { return instance_cnt_; } // static function
int getData() { return data_; } // non-static function
virtual void print() { // virtual function
std::cout << "[Base] address: " << this << " data: " << this->data_ << std::endl;
}
public:
int data_; // non-static data
static int instance_cnt_; // static data
};
int Base::instance_cnt_ = 0;
#pragma pack(pop)
int main() {
Base *a = new Base(100);
Base *b = new Base(200);
uint64_t ptr_list[2] = {(uint64_t)a, (uint64_t)b};
for (int idx = 0; idx < 2; ++idx) {
uint64_t ptr = ptr_list[idx];
std::cout << "instance " << idx << " address " << (void *)ptr << std::endl;
uint64_t pointer2vptr = ptr; // 指向vptr的地址,前8字节存放vptr
uint64_t pointer2data = ptr + 8; // 指向数据的地址,之后存放数据
uint64_t vptr = *(uint64_t *)pointer2vptr; // 获取vptr的具体数据
int data = *(int *)pointer2data; // 获取data的具体数据
std::cout << " vptr address " << (void *)pointer2vptr << " vptr " << (void *)vptr << std::endl;
std::cout << " data address " << (void *)pointer2data << " data " << data << std::endl;
// 关于虚表
// 1. 虚表存放了多个指针,顺序是:offset, type_info, virtual_func1, virtual_func2, ...
// 2. 虚函数在虚表中的顺序和声明的顺序一致
// 3. 实例的vptr指向的是第一个virtual_func,而不是vtable真正的起点
// 4. GCC对于虚析构函数会生成两个虚函数
uint64_t *vtable = (uint64_t *)vptr; // 严格来说是 vptr - 16
uint64_t offset_ptr = vtable[-2];
uint64_t type_info_ptr = vtable[-1];
uint64_t destructor1_ptr = vtable[0]; // 析构函数,但是不调用delete()
uint64_t destructor2_ptr = vtable[1]; // 析构函数,析构完调用delete()
uint64_t print_fn_ptr = vtable[2];
std::cout << "vtable address " << vtable << std::endl;
std::cout << " offset address " << (void *)offset_ptr << std::endl;
std::cout << " type_info address " << (void *)type_info_ptr << std::endl;
std::cout << " destructor1 address " << (void *)destructor1_ptr << std::endl;
std::cout << " destructor2 address " << (void *)destructor2_ptr << std::endl;
std::cout << " print_fn address " << (void *)print_fn_ptr << std::endl;
typedef void (*print_fn_type)(void *); // print函数的类型,入参是一个指针,且无返回值
// call print
std::cout << "call from instance:" << std::endl;
((Base *)ptr)->print();
std::cout << "call from vtable:" << std::endl;
((print_fn_type)print_fn_ptr)((void *)ptr);
typedef void (*del_fn)(void *);
((del_fn)(destructor2_ptr))((void *)ptr); // 这里调用第二个析构函数
std::cout << std::endl;
}
}
// Output:
// instance 0 address 0x60200000eff0
// vptr address 0x60200000eff0 vptr 0x401fd8
// data address 0x60200000eff8 data 100
// vtable address 0x401fd8
// offset address 0
// type_info address 0x401ff0
// destructor1 address 0x4017c6
// destructor2 address 0x401890
// print_fn address 0x4018b6
// call from instance:
// [Base] address: 0x60200000eff0 data: 100
// call from vtable:
// [Base] address: 0x60200000eff0 data: 100
// destructor: 0x60200000eff0
// instance 1 address 0x60200000efd0
// vptr address 0x60200000efd0 vptr 0x401fd8
// data address 0x60200000efd8 data 200
// vtable address 0x401fd8
// offset address 0
// type_info address 0x401ff0
// destructor1 address 0x4017c6
// destructor2 address 0x401890
// print_fn address 0x4018b6
// call from instance:
// [Base] address: 0x60200000efd0 data: 200
// call from vtable:
// [Base] address: 0x60200000efd0 data: 200
// destructor: 0x60200000efd0
编译命令:
g++ -O0 -std=c++11 -fdump-class-hierarchy -fsanitize=address memory.cpp
-O0
:表示不做编译器优化-fdump-class-hierarchy
: 会dump出内存布局-std=c++11
: 使用C++11标准-fsanitize=address
:开启内存检查
和网上的很多写法有一点点不同,这里为了避免各种复杂的指针转换,就直接用uint64_t(测试环境是64位的)来存指针和计算了,只在打印地址的时候转换成void *
。
具体的字段偏移量的分析,在注释中有详细的解释,这里不再重复了。仅做出整体的分析。
Dump出的内存布局:
Vtable for Base
Base::_ZTV4Base: 5u entries
0 (int (*)(...))0
8 (int (*)(...))(& _ZTI4Base)
16 (int (*)(...))Base::~Base
24 (int (*)(...))Base::~Base
32 (int (*)(...))Base::print
Class Base
size=12 align=1
base size=12 base align=1
Base (0x0x7f291b569360) 0
vptr=((& Base::_ZTV4Base) + 16u)
可以清楚的看出,Base是12字节,按照1字节对齐。vptr指向了虚表首地址+16的位置。并且有两个Base:~Base
的虚函数。
上面程序的输出也可以看出:
- 两个Instance本身的地址和vptr/data的地址均不同,说明这部分数据确实是存放在实例本身的。
- 虚表和虚函数的地址都不变,说明被所有实例共享。
- 类的成员函数本质上也是普通函数,只是默认有了个this指针,通过vtable的直接调用也可以证实。
- 虚析构函数会生成两个虚函数,前者是对象析构但不调用
delete()
,相当于手动调用析构函数obj->~Base()
,后者是析构且调用delete()
,相当于delete obj
。将案例中的析构改为调用第一个的话,就会报内存泄露的错误了。参考 CXX API
继承
单继承
#include <iostream>
#pragma pack(push, 1)
class A {
public:
A(int d) : data_(d) {} // constructor, non-static function
virtual ~A() { std::cout << "[A] destructor: " << this << std::endl; } // destructor, virtual function
virtual void print() { // virtual function
std::cout << "[A] address: " << this << " a: " << &this->data_ << " " << this->data_ << std::endl;
}
public:
int data_; // non-static data
};
class B : public A {
public:
B(int a, int b) : A(a), data_(b) {}
virtual ~B() { std::cout << "[B] destructor: " << this << std::endl; }
virtual void printB() {
std::cout << "[B] address: " << this // this
<< " a: " << &this->A::data_ << " " << this->A::data_ // A
<< " b: " << &this->data_ << " " << this->data_ // B
<< std::endl;
}
public:
int data_;
};
class C : public B {
public:
C(int a, int b, int c) : B(a, b), data_(c) {}
virtual ~C() { std::cout << "[C] destructor: " << this << std::endl; }
virtual void print() {
std::cout << "[C] address: " << this // this
<< " a: " << &this->A::data_ << " " << this->A::data_ << " " << &this->B::A::data_ << " "
<< this->B::A::data_ // A
<< " b: " << &this->B::data_ << " " << this->B::data_ // B
<< " c: " << &this->data_ << " " << this->data_ // C
<< std::endl;
}
public:
int data_;
};
#pragma pack(pop)
int main() {
std::cout << "Sizeof: A " << sizeof(A) << " B " << sizeof(B) << " C " << sizeof(C) << std::endl;
A *a = new A(100);
A *b = new B(100, 200);
A *c = new C(100, 200, 300);
a->print(); // A::print
b->print(); // A:print
((B *)b)->printB(); // B:printB
c->print(); // C:print
((B *)c)->printB(); // B:printB
delete a;
delete b;
delete c;
}
// Output:
// Sizeof: A 12 B 16 C 20
// [A] address: 0x142f030 a: 0x142f038 100
// [A] address: 0x142f050 a: 0x142f058 100
// [B] address: 0x142f050 a: 0x142f058 100 b: 0x142f05c 200
// [C] address: 0x142f070 a: 0x142f078 100 0x142f078 100 b: 0x142f07c 200 c: 0x142f080 300
// [B] address: 0x142f070 a: 0x142f078 100 b: 0x142f07c 200
// [A] destructor: 0x142f030
// [B] destructor: 0x142f050
// [A] destructor: 0x142f050
// [C] destructor: 0x142f070
// [B] destructor: 0x142f070
// [A] destructor: 0x142f070
编译命令:
g++ -fdump-class-hierarchy single_inheritance.cpp
首先查看内存布局:
Vtable for A
A::_ZTV1A: 5u entries
0 (int (*)(...))0
8 (int (*)(...))(& _ZTI1A)
16 (int (*)(...))A::~A
24 (int (*)(...))A::~A
32 (int (*)(...))A::print
Class A
size=12 align=1
base size=12 base align=1
A (0x0x7f12dea0c7e0) 0
vptr=((& A::_ZTV1A) + 16u)
Vtable for B
B::_ZTV1B: 6u entries
0 (int (*)(...))0
8 (int (*)(...))(& _ZTI1B)
16 (int (*)(...))B::~B
24 (int (*)(...))B::~B
32 (int (*)(...))A::print
40 (int (*)(...))B::printB
Class B
size=16 align=1
base size=16 base align=1
B (0x0x7f12de71bb60) 0
vptr=((& B::_ZTV1B) + 16u)
A (0x0x7f12dea0c840) 0
primary-for B (0x0x7f12de71bb60)
Vtable for C
C::_ZTV1C: 6u entries
0 (int (*)(...))0
8 (int (*)(...))(& _ZTI1C)
16 (int (*)(...))C::~C
24 (int (*)(...))C::~C
32 (int (*)(...))C::print
40 (int (*)(...))B::printB
Class C
size=20 align=1
base size=20 base align=1
C (0x0x7f12de71bbc8) 0
vptr=((& C::_ZTV1C) + 16u)
B (0x0x7f12de71bc30) 0
primary-for C (0x0x7f12de71bbc8)
A (0x0x7f12dea0c8a0) 0
primary-for B (0x0x7f12de71bc30)
存储:
- A和之前的Base一样,没有啥好说的。大小
12 = vptr+int
。 - B继承A。同时B也定义了自己的成员变量(虽然和A的相同,但二者不是同一个变量,可以通过obj->A::data_来访问父类的对象)。因此大小是
16 = vptr + A::int + B::int
。 - C继承B。也定义了自己的成员变量。因此大小是
20 = vptr + A::int + B::int + C::int
。 - 通过每个
print
和printB
的打印结果可以看出,派生类先存放了自己的基类的数据,之后才存放自己的数据。
虚表:
每个类都有且只有一个虚表对象。
- A和Base一样就不解释了。
- B继承了A的
print
方法,同时自己又定义了printB
方法,因此B复制了A的虚表结构,除了改了析构函数的地址外,还新增了printB
的指针。 - C继承了B,同时覆盖了
print
方法。因此C复制了B的虚表,修改了析构函数,并修改了print
函数的指针。 - 可以总结个规律:单继承下,派生类有且只有一个虚表,相当于直接将基类的虚表复制一次,替换掉自己的覆盖的虚函数,并追加自己新增的虚函数。
多继承
多继承比单继承复杂了很多。而且多继承一致被很多人诟病,像Java就直接不支持多继承。这里我们不考虑基类重名等情况。
#include <iostream>
#pragma pack(push, 1)
class A {
public:
A(int d) : data_(d) {}
virtual ~A() { std::cout << "[A] destructor: " << this << std::endl; }
virtual void printA1() {
std::cout << "[A1] address: " << this << " " << &this->data_ << " " << this->data_ << std::endl;
}
virtual void printA2() {
std::cout << "[A2] address: " << this << " " << &this->data_ << " " << this->data_ << std::endl;
}
public:
int data_;
};
class B {
public:
B(int d) : data_(d) {}
virtual ~B() { std::cout << "[B] destructor: " << this << std::endl; }
virtual void printB1() {
std::cout << "[B1] address: " << this << " " << &this->data_ << " " << this->data_ << std::endl;
}
virtual void printB2() {
std::cout << "[B2] address: " << this << " " << &this->data_ << " " << this->data_ << std::endl;
}
public:
int data_;
};
class C : public A, public B {
public:
C(int a, int b, int c) : A(a), B(b), data_(c) {}
virtual ~C() { std::cout << "[C] destructor: " << this << std::endl; }
virtual void printB2() {
std::cout << "[C B2] address: " << this // this
<< " a: " << &this->A::data_ << " " << this->A::data_ << " " // A
<< " b: " << &this->B::data_ << " " << this->B::data_ << " " // B
<< " c: " << &this->data_ << " " << this->data_ // C
<< std::endl;
}
virtual void printC() {
std::cout << "[C] address: " << this // this
<< " a: " << &this->A::data_ << " " << this->A::data_ << " " // A
<< " b: " << &this->B::data_ << " " << this->B::data_ << " " // B
<< " c: " << &this->data_ << " " << this->data_ // C
<< std::endl;
}
public:
int data_;
};
#pragma pack(pop)
int main() {
std::cout << "Sizeof: A " << sizeof(A) << " B " << sizeof(B) << " C " << sizeof(C) << std::endl;
C *c = new C(100, 200, 300);
std::cout << "\ncall from C*" << std::endl;
c->printA1(); // A::printA1
c->printA2(); // A::printA2
c->printB1(); // B::printB1
c->printB2(); // C::printB2
c->printC(); // C::printC
std::cout << "\ncall from dynamic_cast A* " << dynamic_cast<A *>(c) << std::endl;
dynamic_cast<A *>(c)->printA1();
dynamic_cast<A *>(c)->printA2();
std::cout << "\ncall from static_cast A*" << static_cast<A *>(c) << std::endl;
static_cast<A *>(c)->printA1();
static_cast<A *>(c)->printA2();
std::cout << "\ncall from reinterpret_cast A*" << reinterpret_cast<A *>(c) << std::endl;
reinterpret_cast<A *>(c)->printA1();
reinterpret_cast<A *>(c)->printA2();
std::cout << "\ncall from dynamic_cast B*" << dynamic_cast<B *>(c) << std::endl;
dynamic_cast<B *>(c)->printB1();
dynamic_cast<B *>(c)->printB2();
std::cout << "\ncall from static_cast B*" << static_cast<B *>(c) << std::endl;
static_cast<B *>(c)->printB1();
static_cast<B *>(c)->printB2();
std::cout << "\ncall from reinterpret_cast B*" << reinterpret_cast<B *>(c) << std::endl;
reinterpret_cast<B *>(c)->printB1();
reinterpret_cast<B *>(c)->printB2();
delete c;
}
// Output:
// Sizeof: A 12 B 12 C 28
// call from C*
// [A1] address: 0x183a030 0x183a038 100
// [A2] address: 0x183a030 0x183a038 100
// [B1] address: 0x183a03c 0x183a044 200
// [C B2] address: 0x183a030 a: 0x183a038 100 b: 0x183a044 200 c: 0x183a048 300
// [C] address: 0x183a030 a: 0x183a038 100 b: 0x183a044 200 c: 0x183a048 300
// call from dynamic_cast A* 0x183a030
// [A1] address: 0x183a030 0x183a038 100
// [A2] address: 0x183a030 0x183a038 100
// call from static_cast A*0x183a030
// [A1] address: 0x183a030 0x183a038 100
// [A2] address: 0x183a030 0x183a038 100
// call from reinterpret_cast A*0x183a030
// [A1] address: 0x183a030 0x183a038 100
// [A2] address: 0x183a030 0x183a038 100
// call from dynamic_cast B*0x183a03c
// [B1] address: 0x183a03c 0x183a044 200
// [C B2] address: 0x183a030 a: 0x183a038 100 b: 0x183a044 200 c: 0x183a048 300
// call from static_cast B*0x183a03c
// [B1] address: 0x183a03c 0x183a044 200
// [C B2] address: 0x183a030 a: 0x183a038 100 b: 0x183a044 200 c: 0x183a048 300
// call from reinterpret_cast B*0x183a030
// [A1] address: 0x183a030 0x183a038 100
// [A2] address: 0x183a030 0x183a038 100
// [C] destructor: 0x183a030
// [B] destructor: 0x183a03c
// [A] destructor: 0x183a030
编译命令:
g++ -fdump-class-hierarchy single_inheritance.cpp
先查看内存布局:
Vtable for A
A::_ZTV1A: 6u entries
0 (int (*)(...))0
8 (int (*)(...))(& _ZTI1A)
16 (int (*)(...))A::~A
24 (int (*)(...))A::~A
32 (int (*)(...))A::printA1
40 (int (*)(...))A::printA2
Class A
size=12 align=1
base size=12 base align=1
A (0x0x7f640b38b7e0) 0
vptr=((& A::_ZTV1A) + 16u)
Vtable for B
B::_ZTV1B: 6u entries
0 (int (*)(...))0
8 (int (*)(...))(& _ZTI1B)
16 (int (*)(...))B::~B
24 (int (*)(...))B::~B
32 (int (*)(...))B::printB1
40 (int (*)(...))B::printB2
Class B
size=12 align=1
base size=12 base align=1
B (0x0x7f640b38b840) 0
vptr=((& B::_ZTV1B) + 16u)
Vtable for C
C::_ZTV1C: 14u entries
0 (int (*)(...))0
8 (int (*)(...))(& _ZTI1C)
16 (int (*)(...))C::~C
24 (int (*)(...))C::~C
32 (int (*)(...))A::printA1
40 (int (*)(...))A::printA2
48 (int (*)(...))C::printB2
56 (int (*)(...))C::printC
64 (int (*)(...))-12
72 (int (*)(...))(& _ZTI1C)
80 (int (*)(...))C::_ZThn12_N1CD1Ev
88 (int (*)(...))C::_ZThn12_N1CD0Ev
96 (int (*)(...))B::printB1
104 (int (*)(...))C::_ZThn12_N1C7printB2Ev
Class C
size=28 align=1
base size=28 base align=1
C (0x0x7f640b0e25b0) 0
vptr=((& C::_ZTV1C) + 16u)
A (0x0x7f640b38b8a0) 0
primary-for C (0x0x7f640b0e25b0)
B (0x0x7f640b38b900) 12
vptr=((& C::_ZTV1C) + 80u)
- A、B本身没啥好说的。占用空间都是
12 = vptr + int
- C继承A和B,顺序是先A再B。C中存放了A、B的数据,并且有两个虚指针(后续解释),因此大小为
28 = vptr + A::int + vptr + B::int + C::int
。 - 根据C打印的结果,可以看出
A::data
的地址偏移了8,前面存放了vptr
B::data
偏移12,前面存放了vptr
和A::data
的数据C::data
偏移24,前面存放了vptr
,A::data
,vptr
,B::data
的数据
- 可以看出,派生类存放数据顺序为:虚指针1 + 基类1的数据 + 虚指针2 + 基类2的数据 + ... + 自己的数据。另外如果继承的多个类,有的有虚函数有的没有,则优先存有虚函数的。参考
虚表:
- A、B的虚表很简单。
- C的虚表有两个,存储上是连续的,每个虚表都是
offset + type_info + func1 + func2 + ...
的顺序。 - C有两个虚指针,第一个的偏移量是0,第二个是
12
,正好是vptr
+int
的大小。 - 第一个虚指针指向第一个虚表的第一个虚函数,第二个虚指针指向了第二个虚表的第一个虚函数。
- 虚函数调用,对于C的实例,如果转换为其他类型指针,则分情况:
A*
指针,由于偏移量是0,所以直接复用C的第一个表即可。B*
指针,通过static_cast
或者dynamic_cast
可以得到B*
指针,此时指针指向了原先实例偏移12字节的位置,其实也就是C中基类B的地址。在调用时,严格对应自己的虚表来执行(此时自己的虚表其实是C的第二个虚表)。- 对于
printB1
的调用,其实就是调用原生B的函数,因此直接调用即可。 - 对于
printB2
的调用,由于C进行了覆盖,这里的虚表对应的函数是_ZThn12_N1C7printB2Ev
,通过工具解析c++filt _ZThn12_N1C7printB2Ev --> non-virtual thunk to C::printB2()
,这是编译器生成的新函数,调用它相对于把当前this指针修正(偏移12字节)以后,再调用C::printB2()
。
- 对于
C*
指针,对于A和C的函数,直接使用第一个虚表来调用。对于B的函数,相当于先自动转换为B*
类型,再调用。
最后,让我们直接通过虚表来调用这里所有的函数,以及强制调用被覆盖的原始函数。
#include <iostream>
#pragma pack(push, 1)
class A {
public:
A(int d) : data_(d) {}
virtual ~A() { std::cout << "[A] destructor: " << this << std::endl; }
virtual void printA1() {
std::cout << "[A1] address: " << this << " " << &this->data_ << " " << this->data_ << std::endl;
}
virtual void printA2() {
std::cout << "[A2] address: " << this << " " << &this->data_ << " " << this->data_ << std::endl;
}
public:
int data_;
};
class B {
public:
B(int d) : data_(d) {}
virtual ~B() { std::cout << "[B] destructor: " << this << std::endl; }
virtual void printB1() {
std::cout << "[B1] address: " << this << " " << &this->data_ << " " << this->data_ << std::endl;
}
virtual void printB2() {
std::cout << "[B2] address: " << this << " " << &this->data_ << " " << this->data_ << std::endl;
}
public:
int data_;
};
class C : public A, public B {
public:
C(int a, int b, int c) : A(a), B(b), data_(c) {}
virtual ~C() { std::cout << "[C] destructor: " << this << std::endl; }
virtual void printB2() {
std::cout << "[C B2] address: " << this // this
<< " a: " << &this->A::data_ << " " << this->A::data_ << " " // A
<< " b: " << &this->B::data_ << " " << this->B::data_ << " " // B
<< " c: " << &this->data_ << " " << this->data_ // C
<< std::endl;
}
virtual void printC() {
std::cout << "[C] address: " << this // this
<< " a: " << &this->A::data_ << " " << this->A::data_ << " " // A
<< " b: " << &this->B::data_ << " " << this->B::data_ << " " // B
<< " c: " << &this->data_ << " " << this->data_ // C
<< std::endl;
}
public:
int data_;
};
#pragma pack(pop)
typedef void (*print_fn)(void *);
print_fn get_func(void *ptr, int idx) {
uint64_t vptr = *(uint64_t *)ptr;
return (print_fn)(((uint64_t *)vptr)[idx]);
}
int main() {
C *c = new C(100, 200, 300);
// printA1
{
std::cout << "A::printA1" << std::endl;
std::cout << "call from C* " << c << std::endl;
c->printA1();
A *a = dynamic_cast<A *>(c);
std::cout << "call from A* " << a << std::endl;
a->printA1();
std::cout << "call from vtable" << std::endl;
get_func(c, 2)(c);
}
// printA2
{
std::cout << "A::printA2" << std::endl;
std::cout << "call from C* " << c << std::endl;
c->printA2();
A *a = dynamic_cast<A *>(c);
std::cout << "call from A* " << a << std::endl;
a->printA2();
std::cout << "call from vtable" << std::endl;
get_func(c, 3)(c);
}
// printB1
{
std::cout << "B::printB1" << std::endl;
std::cout << "call from C* " << c << std::endl;
c->printB1();
std::cout << "call from B*" << std::endl;
B *b = dynamic_cast<B *>(c);
b->printB1();
std::cout << "call from B vtable" << std::endl;
get_func(b, 2)(b);
std::cout << "call from C vtable" << std::endl;
get_func(c, 10)((void *)c + 12);
}
// printB2
{
std::cout << "C::printB2" << std::endl;
std::cout << "call from C* " << c << std::endl;
c->printB2();
B *b = dynamic_cast<B *>(c);
std::cout << "call from B* " << b << std::endl;
b->printB2();
std::cout << "call from B vtable" << std::endl;
get_func(b, 3)(b);
std::cout << "call from C vtable" << std::endl;
get_func(c, 4)(c);
get_func(c, 4)((void *)b - 12);
}
// force call B::printB2
{
B *b = dynamic_cast<B *>(c);
std::cout << "call from B vtable C::printB2" << std::endl;
get_func(b, 3)(b);
std::cout << "force call B::printB2" << std::endl;
B tmp(100);
get_func(&tmp, 3)(b);
}
delete c;
}
// Output:
// A::printA1
// call from C* 0x15b7c20
// [A1] address: 0x15b7c20 0x15b7c28 100
// call from A* 0x15b7c20
// [A1] address: 0x15b7c20 0x15b7c28 100
// call from vtable
// [A1] address: 0x15b7c20 0x15b7c28 100
// A::printA2
// call from C* 0x15b7c20
// [A2] address: 0x15b7c20 0x15b7c28 100
// call from A* 0x15b7c20
// [A2] address: 0x15b7c20 0x15b7c28 100
// call from vtable
// [A2] address: 0x15b7c20 0x15b7c28 100
// B::printB1
// call from C* 0x15b7c20
// [B1] address: 0x15b7c2c 0x15b7c34 200
// call from B*
// [B1] address: 0x15b7c2c 0x15b7c34 200
// call from B vtable
// [B1] address: 0x15b7c2c 0x15b7c34 200
// call from C vtable
// [B1] address: 0x15b7c2c 0x15b7c34 200
// C::printB2
// call from C* 0x15b7c20
// [C B2] address: 0x15b7c20 a: 0x15b7c28 100 b: 0x15b7c34 200 c: 0x15b7c38 300
// call from B* 0x15b7c2c
// [C B2] address: 0x15b7c20 a: 0x15b7c28 100 b: 0x15b7c34 200 c: 0x15b7c38 300
// call from B vtable
// [C B2] address: 0x15b7c20 a: 0x15b7c28 100 b: 0x15b7c34 200 c: 0x15b7c38 300
// call from C vtable
// [C B2] address: 0x15b7c20 a: 0x15b7c28 100 b: 0x15b7c34 200 c: 0x15b7c38 300
// [C B2] address: 0x15b7c20 a: 0x15b7c28 100 b: 0x15b7c34 200 c: 0x15b7c38 300
// call from B vtable C::printB2
// [C B2] address: 0x15b7c20 a: 0x15b7c28 100 b: 0x15b7c34 200 c: 0x15b7c38 300
// force call B::printB2
// [B2] address: 0x15b7c2c 0x15b7c34 200
// [B] destructor: 0x7ffe9a25c23c
// [C] destructor: 0x15b7c20
// [B] destructor: 0x15b7c2c
// [A] destructor: 0x15b7c20
菱形继承
#include <iostream>
#pragma pack(push, 1)
class A {
public:
A(int d) : data_(d) {}
virtual ~A() { std::cout << "[A] destructor: " << this << std::endl; }
virtual void printA1() {
std::cout << "[A1] address: " << this << " " << &this->data_ << " " << this->data_ << std::endl;
}
virtual void printA2() {
std::cout << "[A2] address: " << this << " " << &this->data_ << " " << this->data_ << std::endl;
}
public:
int data_;
};
class B : public A {
public:
B(int a, int b) : A(a), data_(b) {}
virtual ~B() { std::cout << "[B] destructor: " << this << std::endl; }
virtual void printA1() {
std::cout << "[B A1] address: " << this << " " << &this->data_ << " " << this->data_ << std::endl;
}
virtual void printB1() {
std::cout << "[B1] address: " << this // this
<< " a: " << &this->A::data_ << " " << this->A::data_ << " " // A
<< " b: " << &this->data_ << " " << this->data_ // B
<< std::endl;
}
virtual void printB2() {
std::cout << "[B2] address: " << this // this
<< " a: " << &this->A::data_ << " " << this->A::data_ << " " // A
<< " b: " << &this->data_ << " " << this->data_ // B
<< std::endl;
}
public:
int data_;
};
class C : public A {
public:
C(int a, int b) : A(a), data_(b) {}
virtual ~C() { std::cout << "[C] destructor: " << this << std::endl; }
virtual void printA2() {
std::cout << "[C A2] address: " << this << " " << &this->data_ << " " << this->data_ << std::endl;
}
virtual void printC1() {
std::cout << "[C1] address: " << this // this
<< " a: " << &this->A::data_ << " " << this->A::data_ << " " // A
<< " b: " << &this->data_ << " " << this->data_ // C
<< std::endl;
}
virtual void printC2() {
std::cout << "[C2] address: " << this // this
<< " a: " << &this->A::data_ << " " << this->A::data_ << " " // A
<< " b: " << &this->data_ << " " << this->data_ // C
<< std::endl;
}
public:
int data_;
};
class D : public B, public C {
public:
D(int a1, int a2, int b, int c, int d) : B(a1, b), C(a2, c), data_(d) {}
public:
int data_;
};
#pragma pack(pop)
int main() {
std::cout << "Sizeof: A " << sizeof(A) << " B " << sizeof(B) << " C " << sizeof(C) << " D " << sizeof(D)
<< std::endl;
D* d = new D(100, 200, 300, 400, 500);
// d->printA1();
// dynamic_cast<A*>(d)->printA1();
dynamic_cast<B*>(d)->printA1();
dynamic_cast<C*>(d)->printA1();
// d->printA2();
// dynamic_cast<A*>(d)->printA2();
dynamic_cast<B*>(d)->printA2();
dynamic_cast<C*>(d)->printA2();
}
// Output:
// Sizeof: A 12 B 16 C 16 D 36
// [B A1] address: 0x1fc7030 0x1fc703c 300
// [A1] address: 0x1fc7040 0x1fc7048 200
// [A2] address: 0x1fc7030 0x1fc7038 100
// [C A2] address: 0x1fc7040 0x1fc704c 400
内存:
Vtable for A
A::_ZTV1A: 6u entries
0 (int (*)(...))0
8 (int (*)(...))(& _ZTI1A)
16 (int (*)(...))A::~A
24 (int (*)(...))A::~A
32 (int (*)(...))A::printA1
40 (int (*)(...))A::printA2
Class A
size=12 align=1
base size=12 base align=1
A (0x0x7f0d6f469360) 0
vptr=((& A::_ZTV1A) + 16u)
Vtable for B
B::_ZTV1B: 8u entries
0 (int (*)(...))0
8 (int (*)(...))(& _ZTI1B)
16 (int (*)(...))B::~B
24 (int (*)(...))B::~B
32 (int (*)(...))B::printA1
40 (int (*)(...))A::printA2
48 (int (*)(...))B::printB1
56 (int (*)(...))B::printB2
Class B
size=16 align=1
base size=16 base align=1
B (0x0x7f0d6f4add68) 0
vptr=((& B::_ZTV1B) + 16u)
A (0x0x7f0d6f4694e0) 0
primary-for B (0x0x7f0d6f4add68)
Vtable for C
C::_ZTV1C: 8u entries
0 (int (*)(...))0
8 (int (*)(...))(& _ZTI1C)
16 (int (*)(...))C::~C
24 (int (*)(...))C::~C
32 (int (*)(...))A::printA1
40 (int (*)(...))C::printA2
48 (int (*)(...))C::printC1
56 (int (*)(...))C::printC2
Class C
size=16 align=1
base size=16 base align=1
C (0x0x7f0d6f4ade38) 0
vptr=((& C::_ZTV1C) + 16u)
A (0x0x7f0d6f4695a0) 0
primary-for C (0x0x7f0d6f4ade38)
Vtable for D
D::_ZTV1D: 16u entries
0 (int (*)(...))0
8 (int (*)(...))(& _ZTI1D)
16 (int (*)(...))D::~D
24 (int (*)(...))D::~D
32 (int (*)(...))B::printA1
40 (int (*)(...))A::printA2
48 (int (*)(...))B::printB1
56 (int (*)(...))B::printB2
64 (int (*)(...))-16
72 (int (*)(...))(& _ZTI1D)
80 (int (*)(...))D::_ZThn16_N1DD1Ev
88 (int (*)(...))D::_ZThn16_N1DD0Ev
96 (int (*)(...))A::printA1
104 (int (*)(...))C::printA2
112 (int (*)(...))C::printC1
120 (int (*)(...))C::printC2
Class D
size=36 align=1
base size=36 base align=1
D (0x0x7f0d6f524a10) 0
vptr=((& D::_ZTV1D) + 16u)
B (0x0x7f0d6f4adf08) 0
primary-for D (0x0x7f0d6f524a10)
A (0x0x7f0d6f469660) 0
primary-for B (0x0x7f0d6f4adf08)
C (0x0x7f0d6f4adf70) 16
vptr=((& D::_ZTV1D) + 80u)
A (0x0x7f0d6f4696c0) 16
primary-for C (0x0x7f0d6f4adf70)
- A, B, C三个的大小之前已经解释过了。
- D同时继承B和C,这里B和C中各有一份A的数据。D中认为B、C中的A数据是不同的,因此D中存在两份A的数据,可以称为
B::A
和C::A
。同时D有两个虚表。所以内存为36 = B:vptr + B:A:int + B:int + C:vptr + C:A:int + C:int + D:int
虚表:
- 这里也和多继承一样,有两个虚表。
- 其中由于D中有两个A的数据,对应的两个虚表中的A的函数的指针就不做偏移。也就是说,如果转换为
B*
指针,则控制B::A
的数据,反之亦然。 - D不能直接调用A的任何变量和函数,因为B、C分别覆盖了其中的函数,编译器会报错
ambiguous
。
具体使用虚表来调用的例子就不给出了,对照内存布局,很容易看出来。
虚继承
虚继承是为了解决菱形继承而存在的。由于C++本身就不推荐多继承,出现菱形继承的情况又更少了。我们只需要知道虚继承可以减少内存的占用即可。
#include <iostream>
#pragma pack(push, 1)
class A {
public:
A() {}
virtual ~A() { std::cout << "[A] destructor: " << this << std::endl; }
public:
int a_;
};
class B : virtual public A {
public:
B() {}
virtual ~B() { std::cout << "[B] destructor: " << this << std::endl; }
public:
int b_;
};
class C : virtual public A {
public:
C() {}
virtual ~C() { std::cout << "[C] destructor: " << this << std::endl; }
public:
int c_;
};
class D : public B, public C {
public:
D() {}
~D() {
std::cout << "[D] destructor: " << this << std::endl;
}
public:
int d_;
};
#pragma pack(pop)
int main() {
std::cout << "Sizeof: A " << sizeof(A) << " B " << sizeof(B) << " C " << sizeof(C) << " D " << sizeof(D)
<< std::endl;
D d;
d.a_ = 1;
d.B::a_ = 2;
d.C::a_ = 3;
std::cout << d.a_ << " " << d.B::a_ << " " << d.C::a_ << std::endl; // 三部分的值相同
}
// Output:
// Sizeof: A 12 B 24 C 24 D 40
// 3 3 3
// [D] destructor: 0x7ffd41e292a0
// [C] destructor: 0x7ffd41e292ac
// [B] destructor: 0x7ffd41e292a0
// [A] destructor: 0x7ffd41e292bc
这种情况下,D中只有一份A的数据。D中有3个虚指针以及A,B,C,D自身的4个int,因此大小为40 = vptr x 3 + int x 4
。但是dump出的内存结构很复杂,感兴趣的同学可以自行测试一下。
获取函数的地址
最后,补充一下如果在代码中直接获取函数的地址。
#include <iostream>
// 普通C函数
int add(int a, int b) { return a + b; }
class Base {
public:
Base(int d) : data_(d) {}
int add1(int a, int b) { return this->data_ + a + b; }
virtual int add2(int a, int b) { return this->data_ + a + b; }
static int add(int a, int b) { return a + b; }
virtual ~Base() {}
public:
int data_;
};
uint64_t get_func(void *ptr, int idx) {
uint64_t vptr = *(uint64_t *)ptr;
return ((uint64_t *)vptr)[idx];
}
int main() {
// C函数
{
std::cout << "normal func" << std::endl;
typedef int (*add_fn)(int, int);
add_fn fn = (add_fn)add;
std::cout << (void *)fn << " " << add(100, 200) << " " << fn(100, 200) << std::endl;
}
// 类静态函数
{
std::cout << "class static func" << std::endl;
typedef int (*add_fn)(int, int);
add_fn fn = (add_fn)Base::add;
std::cout << (void *)fn << " " << Base::add(100, 200) << " " << fn(100, 200) << std::endl;
}
// 成员函数(非虚函数)
{
std::cout << "class non-virtual func 1" << std::endl;
typedef int (Base::*add_fn)(int, int);
add_fn fn = (add_fn)&Base::add1;
Base b(100);
std::cout << (void *)fn << " " << b.add1(100, 200) << " " << (b.*fn)(100, 200) << std::endl;
}
{
std::cout << "class non-virtual func 2" << std::endl;
typedef int (*add_fn)(void *, int, int);
add_fn fn = (add_fn)&Base::add1;
Base b(100);
std::cout << (void *)fn << " " << b.add1(100, 200) << " " << fn(&b, 100, 200) << std::endl;
}
// 成员函数(虚函数),通过函数名
{
std::cout << "class virtual func 1" << std::endl;
typedef int (Base::*add_fn)(int, int);
add_fn fn = (add_fn)&Base::add2;
Base b(200);
std::cout << (void *)(&Base::add2) << " " << (void *)fn << " " << b.add2(100, 200) << " " << (b.*fn)(100, 200)
<< std::endl;
}
// 成员函数(虚函数),通过虚表
{
std::cout << "class virtual func 2" << std::endl;
typedef int (*add_fn)(void *, int, int);
Base b(200);
add_fn fn = (add_fn)get_func(&b, 0);
std::cout << (void *)fn << " " << b.add2(100, 200) << " " << fn(&b, 100, 200) << std::endl;
}
}
// Output:
// normal func
// 0x400af6 300 300
// class static func
// 0x40116f 300 300
// class non-virtual func 1
// 0x40112c 400 400
// class non-virtual func 2
// 0x40112c 400 400
// class virtual func 1
// 0x40114e 0x1 500 500
// class virtual func 2
// 0x40114e 500 500