本文以REE驱动的加载为例,描述REE侧驱动与TEE侧驱动通信的过程
驱动的加载和初始化可以参考https://blog.csdn.net/shuaifengyun/article/details/72934531。已经做出较为详细的说明,这里对于其中一些更细节的内容做出一些记录,仅供学习参考。
文中提到了optee_probe是建立optee驱动的最后一步,其中前半部分的操作大多数型如下:
invoke_fn = get_invoke_func(np);
if (IS_ERR(invoke_fn))
return (void *)invoke_fn;
if (!optee_msg_api_uid_is_optee_api(invoke_fn)) {
pr_warn("api uid mismatch\n");
return ERR_PTR(-EINVAL);
}
if (!optee_msg_api_revision_is_compatible(invoke_fn)) {
pr_warn("api revision mismatch\n");
return ERR_PTR(-EINVAL);
}
if (!optee_msg_exchange_capabilities(invoke_fn, &sec_caps)) {
pr_warn("capabilities mismatch\n");
return ERR_PTR(-EINVAL);
}
---------------------------------------------------------------------
static bool optee_msg_api_uid_is_optee_api(optee_invoke_fn *invoke_fn)
{
struct arm_smccc_res res;
invoke_fn(OPTEE_SMC_CALLS_UID, 0, 0, 0, 0, 0, 0, 0, &res);
if (res.a0 == OPTEE_MSG_UID_0 && res.a1 == OPTEE_MSG_UID_1 &&
res.a2 == OPTEE_MSG_UID_2 && res.a3 == OPTEE_MSG_UID_3)
return true;
return false;
}
可以看到,首先获取到一个Invoke_fn的函数,然后后面都是调用这个函数与optee进行通信,获取到optee的系统信息,下面就对这个函数如何与optee进行通信做出说明。
Invoke_fn函数实际上获取的是optee_smccc_smc函数,函数实现如下
/*
* Wrap c macros in asm macros to delay expansion until after the
* SMCCC asm macro is expanded.
*/
/*SMCCC_SMC宏,触发smc*/
.macro SMCCC_SMC
__SMC(0)
.endm
/*SMCCC_HVC宏,触发hvc*/
.macro SMCCC_HVC
__HVC(0)
.endm
/* 定义SMCCC宏,其参数为instr */
.macro SMCCC instr
/* 将normal world中的寄存器入栈,保存现场 */
UNWIND( .fnstart)
mov r12, sp /* r12指向老的sp地址 */
push {r4-r7} /* 推r4-r7入栈,则sp = sp - 4 * 4 */
UNWIND( .save {r4-r7})
ldm r12, {r4-r7} /* 把r12指向的内容的刷入r4-r7,其实就是把参数a4-a7存入r4-r7
\instr /* 执行instr参数的内容,即执行smc切换 */
pop {r4-r7} /* 出栈操作,恢复现场 */
ldr r12, [sp, #(4 * 4)]
stm r12, {r0-r3}
bx lr
UNWIND( .fnend)
.endm
/*
* void smccc_smc(unsigned long a0, unsigned long a1, unsigned long a2,
* unsigned long a3, unsigned long a4, unsigned long a5,
* unsigned long a6, unsigned long a7, struct arm_smccc_res *res)
*/
ENTRY(arm_smccc_smc)
SMCCC SMCCC_SMC
ENDPROC(arm_smccc_smc)
由于例子中是arm32的平台,因此调用约定满足ATPCS调用约定。由于smccc_smc函数的入参有9个参数,按照约定,前4个参数存在r0 - r3,其他参数从右向左入栈。因此在执行instr前的一些操作后,寄存器和栈空间分布如下。
res |
|
a7 | |
a6 | |
a5 | |
a4 | <- r12 |
r7 | |
r6 | |
r5 | |
r4 | <- sp |
r0=a0, r1=a1, r2=a2, r3=a3, r4=a4, r5=a5, r6=a6, r7=a7
instr的指令是smc,在执行的时候,arm会进入monitor模式,并触发FIQ中断,在中断向量表中找到smc对应的中断向量并跳转。这部分的介绍可以参考https://blog.csdn.net/shuaifengyun/article/details/72794556
其中需要特别说明执行sm_smc_entry函数时的栈空间分布。sm_smc_entry函数实现如下
LOCAL_FUNC sm_smc_entry , :
UNWIND( .fnstart)
UNWIND( .cantunwind)
srsdb sp!, #CPSR_MODE_MON//将当前模式的lr和spsr寄存器中的值分别存储在monitor模式的sp中
push {r0-r7} //将r0到r7中的值压入栈(sp)
clrex /* Clear the exclusive monitor *///独占清除,可以将关系紧密的独占访问监控器返回为开放模式
/* Find out if we're doing an secure or non-secure entry */
read_scr r1 //获取当前scr寄存器中的值,并将值保存在r1寄存器中
tst r1, #SCR_NS //判定scr寄存器中的值的NS位是否为1,如果是1则将会改变CPSR中的条件标志位为0
bne .smc_from_nsec //如果请求来自于non-secur world,则跳转到smc_from_nsec进行执行
//指向切换到secure world的操作
.smc_from_nsec:
/*
* As we're coming from non-secure world (NS bit set) the stack
* pointer points to sm_ctx.nsec.r0 at this stage. After the
* instruction below the stack pointer points to sm_ctx.
*/
//当前处于non-secure world态,栈指针就是sp
//所以将当前sp的值减去offset就可以得到non-secure world的运行栈地址
//并将sp的值指向得到的non-secure world的运行栈地址
sub sp, sp, #(SM_CTX_NSEC + SM_NSEC_CTX_R0)
//清除r1寄存器中的NS位和FIQ位, 进入安全世界
bic r1, r1, #(SCR_NS | SCR_FIQ) /* Clear NS and FIQ bit in SCR */
write_scr r1 //将r1寄存器中的值写入到scr寄存器中
//将sp的值将上non-secure world context中r8存放的位置
//然后将结果保存到r0寄存器中
add r0, sp, #(SM_CTX_NSEC + SM_NSEC_CTX_R8)
stm r0, {r8-r12} //将r8到r12寄存器中的值保存到r0指向的地址位置
mov r0, sp //将sp的值赋值给r0寄存器
bl sm_from_nsec //跳转到secure world中进行处理来之non-secure world的smc请求
cmp r0, #0 //对比返回值是否为零,即sm_form_nsec函数是否返回true
beq .sm_ret_to_nsec //如果返回false则返回到non-secure world的操作
/*
* Continue into secure world
*/
//如果sm_from_nsec函数执行成功,则继续执行安全世界下的任务
//然后将sp的指针指向sm_ctx.sec.r0的位置
add sp, sp, #(SM_CTX_SEC + SM_SEC_CTX_R0)
.sm_exit:
pop {r0-r7} //把r0-r7复位,即是把NSW传入的前8个参数存入r0-r7
rfefd sp! //利用rfe指令退出中断模式,并且跳转到mov_lr指定的函数地址,即vector_fast_smc_entry
UNWIND( .fnend)
END_FUNC sm_smc_entry
在跳转到sm_from_nsec之前,栈空间和寄存器值如下:
spsr | |
lr | |
r7 | |
r6 | |
r5 | |
r4 | |
r3 | |
r2 | |
r1 | |
r0 | |
r12 | |
r11 | |
r10 | |
r9 | |
r8 | |
... | 长度SM_CTX_NSEC + SM_NSEC_CTX_R8 |
<- sp |
r0=sp, r1=SCR寄存器的值, 其他寄存器的值不用关注因为已经入栈。
通过注释可以知道,目前SP指向的是sm_ctx的起始地址。是通过sub sp, sp, #(SM_CTX_NSEC + SM_NSEC_CTX_R0)得到的。sp从NSW过来以后指向的正好是sm_ctx结构体中nsec.r0,即sp=&sm_ctx.sec.r0,想来也很是神奇。因此直接通过sp减地址即直接算出sm_ctx的地址。sm_ctx结构体如下
struct sm_mode_regs {
uint32_t usr_sp;
uint32_t usr_lr;
uint32_t irq_spsr;
uint32_t irq_sp;
uint32_t irq_lr;
uint32_t fiq_spsr;
uint32_t fiq_sp;
uint32_t fiq_lr;
/*
* Note that fiq_r{8-12} are not saved here. Instead thread_fiq_handler
* preserves r{8-12}.
*/
uint32_t svc_spsr;
uint32_t svc_sp;
uint32_t svc_lr;
uint32_t abt_spsr;
uint32_t abt_sp;
uint32_t abt_lr;
uint32_t und_spsr;
uint32_t und_sp;
uint32_t und_lr;
};
struct sm_nsec_ctx {
struct sm_mode_regs mode_regs;
uint32_t r8;
uint32_t r9;
uint32_t r10;
uint32_t r11;
uint32_t r12;
uint32_t r0;
uint32_t r1;
uint32_t r2;
uint32_t r3;
uint32_t r4;
uint32_t r5;
uint32_t r6;
uint32_t r7;
/* return state */
uint32_t mon_lr;
uint32_t mon_spsr;
};
struct sm_sec_ctx {
struct sm_mode_regs mode_regs;
uint32_t r0;
uint32_t r1;
uint32_t r2;
uint32_t r3;
uint32_t r4;
uint32_t r5;
uint32_t r6;
uint32_t r7;
/* return state */
uint32_t mon_lr;
uint32_t mon_spsr;
};
struct sm_ctx {
uint32_t pad;
struct sm_sec_ctx sec;
struct sm_nsec_ctx nsec;
};
然后会调用sm_from_nsec函数,参数是sm_ctx*。下面是sm_from_nsec 函数实现
bool sm_from_nsec(struct sm_ctx *ctx)
{
uint32_t *nsec_r0 = (uint32_t *)(&ctx->nsec.r0);
if (!sm_platform_handler(ctx))
return false;
#ifdef CFG_PSCI_ARM32
if (OPTEE_SMC_OWNER_NUM(*nsec_r0) == OPTEE_SMC_OWNER_STANDARD) {
smc_std_handler((struct thread_smc_args *)nsec_r0, &ctx->nsec);
return false; /* Return to non secure state */
}
#endif
sm_save_modes_regs(&ctx->nsec.mode_regs);
sm_restore_modes_regs(&ctx->sec.mode_regs);
memcpy(&ctx->sec.r0, nsec_r0, sizeof(uint32_t) * 8);
if (OPTEE_SMC_IS_FAST_CALL(ctx->sec.r0))
ctx->sec.mon_lr = (uint32_t)&thread_vector_table.fast_smc_entry;
else
ctx->sec.mon_lr = (uint32_t)&thread_vector_table.std_smc_entry;
return true; /* return into secure state */
}
然后是在sm_ctx结构体中把nsec的r0-r7的值copy到了sec的r0-r7
然后是根据r0的值判断是否为fast_smc模式。加载驱动时调用的都是fast模式。下面举例:
#define OPTEE_SMC_FAST_CALL_VAL(func_num) \
ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, ARM_SMCCC_SMC_32, \
ARM_SMCCC_OWNER_TRUSTED_OS, (func_num))
#define OPTEE_SMC_CALLS_COUNT \
ARM_SMCCC_CALL_VAL(OPTEE_SMC_FAST_CALL, SMCCC_SMC_32, \
SMCCC_OWNER_TRUSTED_OS_END, \
OPTEE_SMC_FUNCID_CALLS_COUNT)
#define OPTEE_SMC_CALLS_UID \
ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, ARM_SMCCC_SMC_32, \
ARM_SMCCC_OWNER_TRUSTED_OS_END, \
OPTEE_SMC_FUNCID_CALLS_UID)
#define OPTEE_SMC_CALLS_REVISION \
ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, ARM_SMCCC_SMC_32, \
ARM_SMCCC_OWNER_TRUSTED_OS_END, \
OPTEE_SMC_FUNCID_CALLS_REVISION)
ctx->sec.mon_lr被赋值为fast_smc_entry,其实是vector_fast_smc_entry。位于optee_os/core/arch/arm/kernel/thread_a32.S
LOCAL_FUNC vector_fast_smc_entry , :
UNWIND( .fnstart)
UNWIND( .cantunwind)
push {r0-r7}
mov r0, sp
bl thread_handle_fast_smc
pop {r1-r8}
ldr r0, =TEESMC_OPTEED_RETURN_CALL_DONE
smc #0
b . /* SMC should not return */
UNWIND( .fnend)
END_FUNC vector_fast_smc_entry
sm_from_nsec被调用完成后,如果返回true,则进入SW继续执行。此时r0-r7都被恢复回了NSW传入的a0-a7参数,sm_smc_entry函数退出时,利用rfe指令退出了中断,即退出了monitor模式,进入SW,跳转到vector_fast_smc_entry函数。经过vector_fast_smc_entry函数中push操作,r0-r7寄存器又被重新推入栈,并且栈顶被当作参数传入了thread_handle_fast_smc。
thread_handle_fast_smc实际调用的是tee_entry_fast,位于optee_os/core/arch/arm/tee/entry_fast.c
struct thread_smc_args {
uint32_t a0; /* SMC function ID */
uint32_t a1; /* Parameter */
uint32_t a2; /* Parameter */
uint32_t a3; /* Thread ID when returning from RPC */
uint32_t a4; /* Not used */
uint32_t a5; /* Not used */
uint32_t a6; /* Not used */
uint32_t a7; /* Hypervisor Client ID */
};
void tee_entry_fast(struct thread_smc_args *args)
{
switch (args->a0) {
/* Generic functions */
case OPTEE_SMC_CALLS_COUNT:
tee_entry_get_api_call_count(args);
break;
case OPTEE_SMC_CALLS_UID:
tee_entry_get_api_uuid(args);
break;
case OPTEE_SMC_CALLS_REVISION:
tee_entry_get_api_revision(args);
break;
case OPTEE_SMC_CALL_GET_OS_UUID:
tee_entry_get_os_uuid(args);
break;
case OPTEE_SMC_CALL_GET_OS_REVISION:
tee_entry_get_os_revision(args);
break;
/* OP-TEE specific SMC functions */
case OPTEE_SMC_GET_SHM_CONFIG:
tee_entry_get_shm_config(args);
break;
case OPTEE_SMC_L2CC_MUTEX:
tee_entry_fastcall_l2cc_mutex(args);
break;
case OPTEE_SMC_EXCHANGE_CAPABILITIES:
tee_entry_exchange_capabilities(args);
break;
case OPTEE_SMC_DISABLE_SHM_CACHE:
tee_entry_disable_shm_cache(args);
break;
case OPTEE_SMC_ENABLE_SHM_CACHE:
tee_entry_enable_shm_cache(args);
break;
case OPTEE_SMC_BOOT_SECONDARY:
tee_entry_boot_secondary(args);
break;
default:
args->a0 = OPTEE_SMC_RETURN_UNKNOWN_FUNCTION;
break;
}
}
tee_entry_fast函数在执行完成后会把结果放在参数args里,即thread_handle_fast_smc的调用tee_entry_fast之前的栈。tee_entry_fast调用完成后thread_handle_fast_smc将结果推入r1-r8寄存器中然后将r0赋值TEESMC_OPTEED_RETURN_CALL_DONE,准备再次进入monitor模式返回NSW。下面要看sm_smc_entry函数中关于从SW进入monitor模式的处理
LOCAL_FUNC sm_smc_entry , :
UNWIND( .fnstart)
UNWIND( .cantunwind)
srsdb sp!, #CPSR_MODE_MON//将当前模式的lr和spsr寄存器中的值分别存储在monitor模式的sp中
push {r0-r7} //将r0到r7中的值压入栈(sp)
clrex /* Clear the exclusive monitor *///独占清除,可以将关系紧密的独占访问监控器返回为开放模式
/* Find out if we're doing an secure or non-secure entry */
read_scr r1 //获取当前scr寄存器中的值,并将值保存在r1寄存器中
tst r1, #SCR_NS //判定scr寄存器中的值的NS位是否为1,如果是1则将会改变CPSR中的条件标志位为0
bne .smc_from_nsec //如果请求来自于non-secur world,则跳转到smc_from_nsec进行执行
/*
* As we're coming from secure world (NS bit cleared) the stack
* pointer points to sm_ctx.sec.r0 at this stage. After the
* instruction below the stack pointer points to sm_ctx.
*/
sub sp, sp, #(SM_CTX_SEC + SM_SEC_CTX_R0) //将sp指向sm_ctx
/* Save secure context */
add r0, sp, #SM_CTX_SEC //将r0指向sm_ctx.sec
bl sm_save_modes_regs
/*
* On FIQ exit we're restoring the non-secure context unchanged, on
* all other exits we're shifting r1-r4 from secure context into
* r0-r3 in non-secure context.
*/
add r8, sp, #(SM_CTX_SEC + SM_SEC_CTX_R0) //将sm_ctx.sec.r0的地址赋给r8
ldm r8, {r0-r4} //将sm_ctx.sec.r0到sm_ctx.sec.r4的值赋给r0-r4寄存器
mov_imm r9, TEESMC_OPTEED_RETURN_FIQ_DONE // 将FIQ指向完的值保存到r9寄存器中
cmp r0, r9 //对比r0寄存器和r9寄存器中的值,此时r0的值是TEESMC_OPTEED_RETURN_CALL_DONE
addne r8, sp, #(SM_CTX_NSEC + SM_NSEC_CTX_R0) //如果r0与r9不相等则r8指向sm_ctx.nsec.r0
stmne r8, {r1-r4} //如果r0与r9不相等,则将r1到r4寄存器中的值依次加载到sm_ctx.nsec.r0到sm_ctx.nsec.r3
/* Restore non-secure context */
add r0, sp, #SM_CTX_NSEC //r0指向sm_ctx.nsec
bl sm_restore_modes_regs //恢复non-secure context的内容
//执行返回到non-seure world的操作
.sm_ret_to_nsec:
/*
* Return to non-secure world
*/
//将smc_ctx.nesc.r8的地址赋值给r0
add r0, sp, #(SM_CTX_NSEC + SM_NSEC_CTX_R8)
ldm r0, {r8-r12} //smc_ctx.nesc.r8到smc_ctx.nesc.r12的值赋值给r8-r12
/* Update SCR */
read_scr r0 //获取当前scr寄存器的值,并保存到r0寄存器中
//将scr中的NS位和FIQ位置1
orr r0, r0, #(SCR_NS | SCR_FIQ) /* Set NS and FIQ bit in SCR */
write_scr r0 //将修改后的r0的值写入到scr寄存器中
//sp指向sm_ctx.nsec.r0
add sp, sp, #(SM_CTX_NSEC + SM_NSEC_CTX_R0)
b .sm_exit //跳转到sm_exit函数继续执行
//执行退出sm操作
.sm_exit:
pop {r0-r7} //恢复r0-r7
rfefd sp!
UNWIND( .fnend)
END_FUNC sm_smc_entry
最后sm_exit操作中首先恢复r0-r7寄存器,由于此时sp指向的是sm_ctx.nsec.r0,其实恢复的是SW中执行得到的结果到r0-r7寄存器,同时sp指向mon_lr。下一条指令rfefd就跳转到了mon_lr指向的位置,就是跳回了optee_smccc_smc中smc指令的位置。我们再列一下optee_smccc_smc函数的实现。
/*
* Wrap c macros in asm macros to delay expansion until after the
* SMCCC asm macro is expanded.
*/
/*SMCCC_SMC宏,触发smc*/
.macro SMCCC_SMC
__SMC(0)
.endm
/*SMCCC_HVC宏,触发hvc*/
.macro SMCCC_HVC
__HVC(0)
.endm
/* 定义SMCCC宏,其参数为instr */
.macro SMCCC instr
/* 将normal world中的寄存器入栈,保存现场 */
UNWIND( .fnstart)
mov r12, sp /* r12指向老的sp地址 */
push {r4-r7} /* 推r4-r7入栈,则sp = sp - 4 * 4 */
UNWIND( .save {r4-r7})
ldm r12, {r4-r7} /* 把r12指向的内容的刷入r4-r7,其实就是把参数a4-a7存入r4-r7
\instr /* 执行instr参数的内容,即执行smc切换 */
pop {r4-r7} /* 出栈操作,恢复现场,此时栈顶存的是a4,a5,a6,a7,res */
ldr r12, [sp, #(4 * 4)] // sp + 4的值赋值给r12,这个值是res的地址
stm r12, {r0-r3} //把r0-r3的值赋值给res
bx lr //返回
UNWIND( .fnend)
.endm
/*
* void smccc_smc(unsigned long a0, unsigned long a1, unsigned long a2,
* unsigned long a3, unsigned long a4, unsigned long a5,
* unsigned long a6, unsigned long a7, struct arm_smccc_res *res)
*/
ENTRY(arm_smccc_smc)
SMCCC SMCCC_SMC
ENDPROC(arm_smccc_smc)
如上面注释所示,smcc_smc函数的第9个参数res的地址被填充了r0-r3,需要注意的是32位机的long也是32位的。
r0-r3的值是tee_entry_fast函数中计算的结果,被存在了struct thread_smc_args型的args的a0, a1, a2, a3变量并被带回了NSW。放在了smcc_smc的结果中。
REE侧的驱动加载和初始化时,OPTEE端已经初始化完毕,因此可以进行调用。通过以上的代码分析可以看到,NSW通过smc指令进入monitor模式然后在monitor模式中更改SCR寄存器转入SW,同时跳转相应的处理函数。处理结束后同样进入monitor模式更改SCR寄存器进入NSW,然后退回到NSW调用出,取到结果。
以上分析中有一处不明,即vector_fast_smc_entry函数调用完tee_entry_fast后,将r0-r7的返回值存入r1-r8寄存器,用r0做进入monitor后的function id。而在进入monitor后并没有处理r1-r8寄存器,而是将r0-r7寄存器压入栈,也就是丢弃了r8。然后在sm_smc_entry函数中把r1-r4的值存入stm_ctx.nsec.r0到stm_ctx.nsec.r3,然后再将stm_ctx.nsec.r0到stm_ctx.nsec.r3恢复到NSW中的r0-r3。也就是说并不关心SW时期赋值的r4-r7的值,甚至丢弃了r7。后续需要进一步研究