# 从字节码角度看storage、memory、calldata的拷贝 **Published by:** [rbtree](https://paragraph.com/@rbtree/) **Published on:** 2022-09-20 **URL:** https://paragraph.com/@rbtree/storage-memory-calldata ## Content 本文讲从EVM操作码的角度,研究storage、memory、calldata的数据拷贝。 solidity version = 0.8.15,optimizer = false,evm_version = "london" 在线反汇编 https://ethervm.io/decompile 由于字节码阅读起来较为麻烦,本文主要分析反汇编后的代码。 有兴趣的同学可以自行逐行研究evm操作码。1 storage→storagecontract Test { uint256[3] storage_numbers1; uint256[3] storage_numbers2; function test() public { storage_numbers1 = storage_numbers2; } } 反汇编代码如下,test函数大致对应func_0035(), func_0035()确认了两个数组的长度(0x03), 然后进入func_0048. 我们主要关注func_0048里的storage[temp5] = storage[temp4]; 这其实是在一个goto循环里,会把数组中storage_numbers2中的每一个元素复制到storage_numbers1中。 可见,storage之间的赋值是深拷贝。contract Contract { function main() { ...... func_0035(); stop(); } function func_0035() { var var0 = 0x45; var var1 = 0x00; var var2 = 0x03; var var3 = 0x03; var0 = func_0048(var1, var2, var3); } function func_0048(var arg0, var arg1, var arg2) returns (var r0) { var var0 = arg0; var temp0 = arg1; arg1 = var0 + 0x03; var var1 = temp0; if (!arg2) { ...... } else { var temp2 = arg2; var temp3 = var1; arg2 = temp3; var1 = arg2 + temp2; if (var1 <= arg2) { label_0070: goto label_0071; } else { label_005F: var temp4 = arg2; var temp5 = var0; storage[temp5] = storage[temp4]; arg2 = temp4 + 0x01; var1 = var1; var0 = temp5 + 0x01; if (var1 <= arg2) { goto label_0070; } else { goto label_005F; } } } } } 2 storage/local storage→local storagecontract Test { uint256[3] storage_numbers1; uint256 n; function test() public { uint256[3] storage local1 = storage_numbers1; uint256[3] storage local2 = local1; // add this to avoid being opt out n = local2[1]; } } 反汇编代码如下. 在汇编代码里看不到storage复制的痕迹。 solidity的两行代码对应func_0035()中的: var var1 = var0; var var2 = var1; local storage相当于storage的指针(storage的slot编号),storage/local storage → local storage只是指针的赋值,显然属于浅拷贝。contract Contract { function main() { ...... func_0035(); stop(); } function func_0035() { var var0 = 0x00; var var1 = var0; var var2 = var1; var var3 = 0x01; if (var3 < 0x03) { storage[0x03] = storage[var3 + var2]; return; } else { .... } } } 3 local storage→storagecontract Test { uint256[3] storage_numbers; function test() public { uint256[3] storage local = storage_numbers; storage_numbers = local; } } 反汇编代码如下。 主要的拷贝逻辑在func_004A,我们看到赋值语句storage[temp4] = storage[temp3];在一个goto循环里,说明这个函数会拷贝数组中的每一个值。 显然,local storage→storage是深拷贝,过程和storage→storage类似。contract Contract { function main() { ...... func_0035(); stop(); } function func_0035() { var var0 = 0x00; var var1 = 0x46; var var2 = 0x00; var var3 = var0; var var4 = 0x03; var1 = func_004A(var2, var3, var4); } function func_004A(var arg0, var arg1, var arg2) returns (var r0) { var var0 = arg0; var var1 = arg1; arg1 = var0 + 0x03; if (!arg2) { label_0073: ...... } else { var temp1 = arg2; var temp2 = var1; arg2 = temp2; var1 = arg2 + temp1; if (var1 <= arg2) { label_0072: goto label_0073; } else { label_0061: var temp3 = arg2; var temp4 = var0; storage[temp4] = storage[temp3]; arg2 = temp3 + 0x01; var0 = temp4 + 0x01; var1 = var1; if (var1 <= arg2) { goto label_0072; } else { goto label_0061; } } } } } 4 storage/local storage→memorycontract Test { uint256[3] storage_numbers; uint256 n; function test() public { uint256[3] storage local = storage_numbers; uint256[3] memory m1 = local; // add this to avoid being opt out n = m1[1]; } } 反汇编代码如下. 复制对应代码为memory[temp5:temp5 + 0x20] = storage[temp4];这个语句也在一个goto循环中,storage中的每一个元素都会被复制到memory中。 可见storage/local storage→memory是深拷贝。contract Contract { function main() { ...... func_0035(); stop(); } function func_0035() { var var0 = 0x00; var var1 = var0; var temp0 = memory[0x40:0x60]; memory[0x40:0x60] = temp0 + 0x20 * 0x03; var var2 = temp0; var var3 = var1; var var5 = var2; var var4 = 0x03; var var6 = var3; var var7 = 0x03; if (!var7) { label_006E: ...... } else { var temp1 = var5; var temp2 = temp1 + var7 * 0x20; var5 = temp2; var temp3 = var6; memory[temp1:temp1 + 0x20] = storage[temp3]; var7 = temp1 + 0x20; var6 = temp3 + 0x01; if (var5 <= var7) { goto label_006E; } label_005B: var temp4 = var6; var temp5 = var7; memory[temp5:temp5 + 0x20] = storage[temp4]; var7 = temp5 + 0x20; var6 = temp4 + 0x01; if (var5 > var7) { goto label_005B; } else { goto label_006E; } } } } 5 memory→memorycontract Test { // uint256[3] storage_numbers; uint256 n; function test() public { uint256[3] memory m1; uint256[3] memory m2 = m1; // add this to avoid being opt out n = m2[1]; } } 反汇编代码如下: func_0060()函数是uint256[3]所对应的空间的申请,返回值表示solidity中的m1,实际上m1就是一个memory的指针。 var var2 = var1;对应solidity中的uint256[3] memory m2 = m1; 可见这里并没有复制数组的每一个元素,而仅仅是负值了指针本身。 所以memory→memory是浅拷贝。contract Contract { function main() { ...... func_0035(); stop(); } function func_0035() { var var0 = 0x3b; var0 = func_0060(); var var1 = var0; var var2 = var1; var var3 = 0x01; if (var3 < 0x03) { storage[0x00] = memory[var3 * 0x20 + var2:var3 * 0x20 + var2 + 0x20]; return; } else { ...... } } function func_0060() returns (var r0) { var temp0 = memory[0x40:0x60]; memory[0x40:0x60] = temp0 + 0x60; memory[temp0:temp0 + 0x03 * 0x20] = msg.data[msg.data.length:msg.data.length + 0x03 * 0x20]; return temp0; } } 6 memory→storage/local storagecontract Test { uint256[3] storage_numbers; function test() public { uint256[3] storage local = storage_numbers; uint256[3] memory m; storage_numbers = m; //local = m; // compile failed } } memory→local storage的赋值无法通过编译: Type uint256[3] memory is not implicitly convertible to expected type uint256[3] storage pointer. 我们来看memory→storage的情况。 function func_0051()是为m在memory内申请空间。主要的赋值代码在func_0073。 我们看到storage[temp5] = memory[temp4:temp4 + 0x20];这个赋值语句在一个goto循环里。所以,m的每一个元素都会被复制到storage_numbers之中。 可见memory→storage是深拷贝。contract Contract { function main() { ...... func_0035(); stop(); } function func_0035() { var var0 = 0x00; var var1 = 0x3d; var1 = func_0051(); var var2 = 0x4c; var var3 = 0x00; var var4 = var1; var var5 = 0x03; var2 = func_0073(var3, var4, var5); } function func_0051() returns (var r0) { var temp0 = memory[0x40:0x60]; memory[0x40:0x60] = temp0 + 0x60; memory[temp0:temp0 + 0x03 * 0x20] = msg.data[msg.data.length:msg.data.length + 0x03 * 0x20]; return temp0; } function func_0073(var arg0, var arg1, var arg2) returns (var r0) { var var0 = arg0; var temp0 = arg1; arg1 = var0 + 0x03; var var1 = temp0; if (!arg2) { label_009F: ...... } else { var temp2 = arg2; var temp3 = var1; arg2 = temp3; var1 = arg2 + temp2 * 0x20; if (var1 <= arg2) { label_009E: goto label_009F; } else { label_008D: var temp4 = arg2; var temp5 = var0; storage[temp5] = memory[temp4:temp4 + 0x20]; arg2 = temp4 + 0x20; var1 = var1; var0 = temp5 + 0x01; if (var1 <= arg2) { goto label_009E; } else { goto label_008D; } } } } } 7 calldata→storage/local storagecontract Test { uint256[3] storage_numbers; function test(uint256[3] calldata data) public { uint256[3] storage local = storage_numbers; storage_numbers = data; //local = data; // compile failed } } calldata→local storage的赋值无法通过编译: Type uint256[3] calldata is not implicitly convertible to expected type uint256[3] storage pointer. 我们来看calldata→storage的情况。 主要关注func_0064,我们可以看到storage[temp5] = msg.data[temp4:temp4 + 0x20];在一个goto循环之中,说明calldata数组中的每个元素都会被复制到storage中。 可见calldata→storage是深拷贝。contract Contract { function main() { ...... func_0045(var2); stop(); } function func_0045(var arg0) { var var0 = 0x00; var var1 = 0x005f; var var2 = 0x00; var var3 = arg0; var var4 = 0x03; var1 = func_0064(var2, var3, var4); } function func_0064(var arg0, var arg1, var arg2) returns (var r0) { var var0 = arg0; var temp0 = arg1; arg1 = var0 + 0x03; var var1 = temp0; if (!arg2) { label_0093: ...... } else { var temp2 = arg2; var temp3 = var1; arg2 = temp3; var1 = arg2 + temp2 * 0x20; if (var1 <= arg2) { label_0092: goto label_0093; } else { label_0080: var temp4 = arg2; var temp5 = var0; storage[temp5] = msg.data[temp4:temp4 + 0x20]; arg2 = temp4 + 0x20; var0 = temp5 + 0x01; var1 = var1; if (var1 <= arg2) { goto label_0092; } else { goto label_0080; } } } } } 8 calldata→memorycontract Test { uint256 n; function test(uint256[3] calldata data) public { uint256[3] memory m; m = data; // add this to avoid being opt out n = m[1]; } } 反汇编代码如下, 我们看到func_0045中有这样一个语句: memory[temp0:temp0 + 0x20 * 0x03] = msg.data[arg0:arg0 + 0x20 * 0x03]; 0x20是uint256的字节数,0x03表示数组有3个元素。显然这句话是把整个数组都从calldata复制到了memory中。 可见calldata→memory是深拷贝。contract Contract { function main() { ...... var2 = func_00FF(var3, var4); func_0045(var2); stop(); } function func_0045(var arg0) { var var0 = 0x0054; var0 = func_00B1(); var temp0 = memory[0x40:0x60]; memory[0x40:0x60] = temp0 + 0x20 * 0x03; memory[temp0:temp0 + 0x20 * 0x03] = msg.data[arg0:arg0 + 0x20 * 0x03]; memory[temp0 + 0x20 * 0x03:temp0 + 0x20 * 0x03 + 0x20] = 0x00; var0 = temp0; var var1 = var0; var var2 = 0x01; if (var2 < 0x03) { storage[0x00] = memory[var2 * 0x20 + var1:var2 * 0x20 + var1 + 0x20]; return; } else { var var3 = 0x00a1; memory[0x00:0x20] = 0x4e487b7100000000000000000000000000000000000000000000000000000000; memory[0x04:0x24] = 0x32; revert(memory[0x00:0x24]); } } function func_00B1() returns (var r0) { var temp0 = memory[0x40:0x60]; memory[0x40:0x60] = temp0 + 0x60; memory[temp0:temp0 + 0x03 * 0x20] = msg.data[msg.data.length:msg.data.length + 0x03 * 0x20]; return temp0; } } 9 storage/local storage/memory→calldatacontract Test { uint256[3] storage_numbers; function test(uint256[3] calldata data) public { uint256[3] memory m; uint256[3] storage local; //data = storage_numbers; // compile failed //data = local; // compile failed //data = m; // compile failed } } storage、local storage、memory→calldata都无法通过编译。 Type uint256[3] storage ref is not implicitly convertible to expected type uint256[3] calldata. Type uint256[3] storage pointer is not implicitly convertible to expected type uint256[3] calldata. Type uint256[3] memory is not implicitly convertible to expected type uint256[3] calldata. 在solidity的官方文档中,我们可以看到calldata是不可改的, Calldata is a non-modifiable, non-persistent area where function arguments are stored, and behaves mostly like memory. https://docs.soliditylang.org/en/latest/types.html#data-location-and-assignment-behaviour10 calldata→calldatacontract Test { uint256 n; function test(uint256[3] calldata data1, uint256[3] calldata data2) public { data1 = data2; // add this to avoid being opt out n = data1[1]; //data1[1] = 100; // compile failed } } 我原以为这段代码会无法通过编译,让我意外的是居然编译成功了。 data1 = data2;应该是对应func_003F中的var var0 = arg0; 这个地方显然是一个浅拷贝,calldata本身也可以看作一个指针。 在data1=data2之后,对data1的读操作都相当于对data2的读操作。 这并没有违背calldata数据不可更改的规则。 像data1[1] = 100; 这样的语句,会真正修改calldata,这是不允许的,会编译报错: TypeError: Calldata arrays are read-only.contract Contract { function main() { ...... var2, var3 = func_0091(var3, var4); func_003F(var2, var3); stop(); } function func_003F(var arg0, var arg1) { arg0 = arg1; var var0 = arg0; var var1 = 0x01; if (var1 < 0x03) { storage[0x00] = msg.data[var1 * 0x20 + var0:var1 * 0x20 + var0 + 0x20]; return; } else { var var2 = 0x58; memory[0x00:0x20] = 0x4e487b7100000000000000000000000000000000000000000000000000000000; memory[0x04:0x24] = 0x32; revert(memory[0x00:0x24]); } } function func_0072(var arg0, var arg1) returns (var r0) { var var0 = arg1; if (var0 + 0x03 * 0x20 <= arg0) { return var0; } var var1 = 0x8a; revert(memory[0x00:0x00]); } function func_0091(var arg0, var arg1) returns (var r0, var arg0) { var var0 = 0x00; var var1 = var0; if (arg0 - arg1 i>= 0xc0) { var var2 = 0x00; var var3 = 0xb1; var var4 = arg0; var var5 = arg1 + var2; var3 = func_0072(var4, var5); var0 = var3; var2 = 0x60; var3 = 0xc0; var4 = arg0; var5 = arg1 + var2; var3 = func_0072(var4, var5); arg0 = var3; r0 = var0; return r0, arg0; } else { var2 = 0xa4; revert(memory[0x00:0x00]); } } } 总结 ## Publication Information - [rbtree](https://paragraph.com/@rbtree/): Publication homepage - [All Posts](https://paragraph.com/@rbtree/): More posts from this publication - [RSS Feed](https://api.paragraph.com/blogs/rss/@rbtree): Subscribe to updates