Zain Lau

FastAPI部署中国最强通用语言大模型Baichuan2

首先，需要安装 FastAPI 和 Uvicorn（用于运行 FastAPI 应用程序）：

pip install fastapi uvicorn

使用 FastAPI 框架：

from fastapi import FastAPI, HTTPException, Request
from pydantic import BaseModel
import json
import mindspore as ms
from mindformers import AutoConfig, AutoModel, AutoTokenizer

app = FastAPI()

ms.set_context(mode=ms.GRAPH_MODE, device_target="Ascend", device_id=0)

config = AutoConfig.from_pretrained("baichuan2_6b")
config.checkpoint_name_or_path = "../mindformers/checkpoint_download/baichuan2baichuan2_6b.ckpt"
model = AutoModel.from_config(config)
tokenizer = AutoTokenizer.from_pretrained("baichuan2_6b")

class RequestData(BaseModel):
    text: str

@app.post('/baichuan2_bot')
async def say_hello_func(data: RequestData):
    print("----------- in hello func ----------")
    text = data.text
    inputs = tokenizer(tokenizer.build_prompt(text))["input_ids"]
    print(tokenizer.decode(inputs))
    outputs = model.generate(inputs, max_length=14096)
    outputs_text = tokenizer.decode(outputs)
    return {"response": outputs_text}

@app.get('/goodbye')
async def say_goodbye_func():
    print("----------- in goodbye func ----------")
    return {'message': 'Goodbye!'}

@app.post('/')
async def default_func(request: Request):
    print("----------- in default func ----------")
    data = await request.json()
    return {'message': 'Called default func!', 'data': data}

if __name__ == '__main__':
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=8080)

百川大模型

"""Baichuan2_13b models' APIs."""
from typing import Optional
import math
import numpy as np
import mindspore.common.dtype as mstype

try:
    from mindspore._checkparam import Validator
except ImportError:
    import mindspore._checkparam as Validator
from mindspore import Tensor, nn, ops
from mindspore.common.parameter import Parameter
from mindspore.ops import operations as P
from mindspore.common.initializer import initializer, HeUniform
try:
    from mindspore.nn.layer.flash_attention import FlashAttention
    FLASHATTENTION_VALID = True
except ImportError:
    FLASHATTENTION_VALID = False

from mindformers.core.loss.loss import CrossEntropyLoss
from mindformers.mindformer_book import MindFormerBook
from mindformers.models.base_model import BaseModel
from mindformers.models.utils import cell_reuse
from mindformers.modules.transformer.op_parallel_config import _check_config
from mindformers.modules.transformer import AttentionMask, TransformerOpParallelConfig
from mindformers.modules.layers import Linear, _check_input_dtype, _check_past_none_input_none, AlibiTensorV2
from mindformers.tools.register.register import MindFormerModuleType, MindFormerRegister

from mindformers.models.llama.llama import layer_compute_dtype
from mindformers.models.llama.llama_config import LlamaConfig
from mindformers.models.llama.llama_layer import LlamaEmbedding, LlamaFeedForward, LlamaRMSNorm
from mindformers.tools.logger import logger

__all__ = ['Baichuan13BV2ForCausalLM', 'Baichuan13BV2Model']


@MindFormerRegister.register(MindFormerModuleType.MODELS)
class Baichuan13BV2ForCausalLM(BaseModel):
    r"""
        Provide baichuan2_13B training loss or logits through network.
        Args:
            config (LlamaConfig): The config of baichuan2_13B model.

        Inputs:
            input_ids(Tensor): the tokenized inputs with datatype int32, Tensor of shape :math:`(batch, seq\_length)`.
            labels(Tensor): the tokenized labels with datatype int32, Tensor of shape :math:`(batch, seq\_length)`.
            input_position(Tensor): current position, used by model.predict.
            position_ids(Tensor): Reserved param, not used.
            attention_mask(Tensor): Reserved param, not used.
            input_embeds(Tensor): Reserved param, not used.
            init_reset(bool, optional): A bool tensor with shape [1], used to clear the past key parameter and
              past value parameter used in the incremental prediction. Default True.
            batch_valid_length(Tensor): the past calculated the index with datatype int32, used for incremental
              prediction. Tensor of shape :math:`(batch_size,)`. Default None.

        Returns:
            Tensor, the loss or logits of the network.

        Examples:
            >>> from mindformers.models.llama import LlamaConfig
            >>> from research.baichuan2.baichuan2_13b import Baichuan13BV2ForCausalLM
            >>> config = LlamaConfig(batch_size=2)
            >>> network = Baichuan13BV2ForCausalLM(config=config)
        """
    _support_list = MindFormerBook.get_model_support_list()['llama']

    @cell_reuse()
    def __init__(self, config: LlamaConfig = None):
        super(Baichuan13BV2ForCausalLM, self).__init__(config, auto_prefix=True)
        _check_config(config.parallel_config)
        self.ignore_token_id = config.ignore_token_id
        self.pad_token_id = config.pad_token_id
        self.dtype = config.compute_dtype

        self.reshape = P.Reshape()
        self.cast = P.Cast()
        self.slice = P.StridedSlice()
        self.not_equal = P.NotEqual()
        self.mul = P.Mul()
        self.add = P.Add()
        self.model = Baichuan13BV2Model(config=config)
        self.lm_head = NormHead(hidden_size=config.hidden_size,
                                vocab_size=config.vocab_size,
                                compute_dtype=config.compute_dtype)
        self.loss = CrossEntropyLoss(parallel_config=config.parallel_config)

        dp = config.parallel_config.data_parallel
        self.slice.shard(((dp, 1),))
        self.not_equal.shard(((dp, 1), ()))
        self.mul.shard(((dp, 1), (dp, 1)))
        self.add.shard(((dp, 1), ()))
        self.lm_head.shard(config.parallel_config)

        self.load_checkpoint(config)

    # pylint: disable=W0613
    def prepare_inputs_for_generation(self, input_ids, **kwargs):
        return {
            "input_ids": Tensor(input_ids, mstype.int32)
        }

    # pylint: disable=W0613
    def construct(self, input_ids, labels=None, input_position=None, position_ids=None, attention_mask=None,
                  input_embeds=None, init_reset=True, batch_valid_length=None):
        """Baichuan13BV2ForCausalLM forward."""
        bsz, seqlen = input_ids.shape
        if self.training:
            tokens = self.slice(input_ids, (0, 0), (bsz, seqlen - 1), (1, 1))
        else:
            tokens = input_ids

        output = self.model(tokens, input_position, init_reset, batch_valid_length)
        logits = self.lm_head(output)

        input_mask = self.cast(self.not_equal(tokens, self.pad_token_id), self.dtype)
        if labels is None:
            labels = self.slice(input_ids, (0, 1), (bsz, seqlen), (1, 1))
        else:
            if labels.ndim > 1:
                if self.training:
                    labels = self.slice(labels, (0, 1), (bsz, seqlen), (1, 1))
                label_mask = self.cast(self.not_equal(labels, self.ignore_token_id), self.dtype)
                input_mask = self.mul(input_mask, label_mask)

        logits = self.cast(logits, mstype.float32)
        if not self.training:
            logits = self.reshape(logits, (bsz, seqlen, -1))
            # makes cast effective to avoid allgather issue in Mindspore1.10
            input_mask = self.add(input_mask, 1)
            return logits, tokens, input_mask

        if logits.ndim > 2:
            logits = self.reshape(logits, (-1, logits.shape[-1]))
        labels = self.reshape(labels, (-1,))
        input_mask = self.reshape(input_mask, (-1,))
        loss = self.loss(logits, labels, input_mask)
        return loss


class Baichuan13BV2Model(BaseModel):
    r"""
    Transformer decoder consisting of *config.num_hidden_layers* layers. Each layer is a [`Baichuan13BV2DecoderLayer`]
    Args:
        config(LlamaConfig): the config of network

    Inputs:
        input_ids: the tokenized inputs with datatype int32

    Returns:
        output: Tensor, the output of baichuan2_13b decoderlayer
    """
    _support_list = MindFormerBook.get_model_support_list()['llama']

    def __init__(self,
                 config: LlamaConfig = None):
        super().__init__(config, auto_prefix=True)
        _check_config(config.parallel_config)
        if config.batch_size or config.use_past:
            Validator.check_positive_int(config.batch_size)
        self.dtype = config.compute_dtype
        self.num_layers = config.num_layers
        self.pad_token_id = config.pad_token_id
        self.is_first_iteration = True
        self.use_past = config.use_past
        self.use_flash_attention = config.use_flash_attention and FLASHATTENTION_VALID
        if self.use_flash_attention:
            logger.info("Enable flash attention.")
        elif config.use_flash_attention:
            logger.info("Current MindSpore do not support flash attention.")

        self.get_attention_mask = AttentionMask(
            config.seq_length, parallel_config=config.parallel_config.dp_mp_config).to_float(config.compute_dtype)
        self.multiply_data = Tensor([-10000.0], dtype=config.compute_dtype)
        self.one = Tensor([1.0], dtype=config.compute_dtype)
        self.all_ones_attention_mask_alibi = P.Ones()((1, config.seq_length), self.dtype)
        self.reshape = P.Reshape()
        self.cast = P.Cast()
        self.tile = P.Tile()
        self.mul_mask = P.Mul()
        self.mul_alibi = P.Mul()
        self.sub = P.Sub()
        self.expand_dims = P.ExpandDims()
        self.not_equal = P.NotEqual()
        self.gather = P.Gather()
        self.transpose = P.Transpose()

        self.tok_embeddings = LlamaEmbedding(
            config.vocab_size, config.hidden_size, param_init_type=config.param_init_type)
        self.layers = nn.CellList()
        for layer_id in range(config.num_layers):
            layer = Baichuan13BDecodeLayer(config.batch_size,
                                           config.seq_length,
                                           layer_id,
                                           dim=config.hidden_size,
                                           n_heads=config.num_heads,
                                           multiple_of=config.multiple_of,
                                           n_kv_heads=config.n_kv_heads,
                                           ffn_dim_multiplier=config.ffn_dim_multiplier,
                                           norm_eps=config.rms_norm_eps,
                                           compute_dtype=config.compute_dtype,
                                           layernorm_compute_dtype=config.layernorm_compute_type,
                                           softmax_compute_dtype=config.softmax_compute_type,
                                           param_init_type=config.param_init_type,
                                           use_past=config.use_past,
                                           use_flash_attention=config.use_flash_attention,
                                           compute_in_2d=config.compute_in_2d,
                                           use_past_shard=config.use_past_shard,
                                           parallel_config=config.parallel_config)
            layer_compute_dtype(layer, layer_id, config.offset, config.parallel_config,
                                config.num_layers, select_recompute=config.parallel_config.recompute.select_recompute)
            self.layers.append(layer)
        self.norm_out = LlamaRMSNorm(config.hidden_size, config.rms_norm_eps,
                                     compute_type=config.layernorm_compute_type)

        self.build_alibi_tensor = AlibiTensorV2(seq_length=config.seq_length,
                                                num_heads=config.num_heads)

        dp = config.parallel_config.data_parallel
        mp = config.parallel_config.model_parallel
        self.tok_embeddings.pipeline_stage = 0
        if config.parallel_config.pipeline_stage > 1:
            self.norm_out.pipeline_stage = config.parallel_config.pipeline_stage - 1
            self.tok_embeddings.set_comm_fusion(2)
            self.norm_out.set_comm_fusion(2)
        else:
            self.tok_embeddings.set_comm_fusion(config.parallel_config.gradient_aggregation_group)
            self.norm_out.set_comm_fusion(config.parallel_config.gradient_aggregation_group)

        self.tok_embeddings.shard(config.parallel_config)
        self.build_alibi_tensor.shard(config.parallel_config)

        self.tile.shard(((1, 1, 1, 1), ()))
        self.sub.shard(((1,), (dp, 1, 1)))
        self.mul_mask.shard(((dp, 1, 1, 1), (1,)))
        self.mul_alibi.shard(((dp, mp, 1, 1), (dp, 1, 1, 1))) # (dp, mp, 1, 1)
        self.expand_dims.shard(((dp, 1, 1),))
        self.not_equal.shard(((dp, 1), ()))
        self.gather.shard(((dp, mp, 1, 1), (1,)))
        self.transpose.shard(((1, mp, dp, 1),))
        if config.compute_in_2d:
            self.norm_out.shard((dp, 1))
        else:
            self.norm_out.shard((dp, 1, 1))

        if self.use_past:
            seq_range = np.arange(config.seq_length).reshape(1, 1, -1)
            self.ones = P.Ones()
            self.range = Tensor(np.tile(seq_range, (config.batch_size, 1, 1)), mstype.int32)
            self.gather_past = P.Gather()
            self.expand_dims = P.ExpandDims()
            self.le_past = P.LessEqual()
    # pylint: disable=W0613
    def construct(self, tokens: Tensor, input_position=None, init_reset=True, batch_valid_length=None):
        """Forward of baichuan2_13b model."""
        # preprocess
        bs, _ = tokens.shape
        if self.use_past:
            if not isinstance(init_reset, Tensor):
                init_reset = Tensor([init_reset], mstype.bool_)
            if not isinstance(batch_valid_length, Tensor):
                batch_valid_length = self.ones((bs, 1), mstype.int32)

        if self.is_first_iteration:
            input_mask = self.cast(self.not_equal(tokens, self.pad_token_id), self.dtype)
            mask = self.get_attention_mask(input_mask)
            alibi_tensor = self.build_alibi_tensor(input_mask, self.dtype)
            # mask: [bs, seq, seq]
        else:
            cur_pos = batch_valid_length - 1
            valid_length = self.reshape(cur_pos, (-1, 1, 1))
            mask = self.cast(self.le_past(self.range, valid_length), self.dtype)
            alibi_tensor = self.build_alibi_tensor(self.all_ones_attention_mask_alibi, self.dtype)
            alibi_tensor = self.tile(alibi_tensor, (bs, 1, 1, 1))
            alibi_tensor = self.gather(alibi_tensor, cur_pos[0], 2)
            alibi_tensor = self.transpose(alibi_tensor, (2, 1, 0, 3))
            # mask: [bs, 1, 1]
        mask = self.sub(self.one, self.cast(mask, self.dtype))
        if not self.use_flash_attention:
            mask = self.expand_dims(mask, 1)
            mask = self.mul_mask(mask, self.multiply_data)

        # tokens: [bs, seq/1]
        h = self.tok_embeddings(tokens)
        # h: [bs, seq/1, hidden_dim]
        for i in range(self.num_layers):
            h, _ = self.layers[i](h, alibi_tensor, mask,
                                  init_reset=init_reset, batch_valid_length=batch_valid_length)
        output = self.norm_out(h)
        return output


class Baichuan13BDecodeLayer(nn.Cell):
    r"""
        Transformer Layer. This is an implementation of the single layer of the transformer
        encoder layer, including multihead attention and feedward layer.

        Args:
            batch_size(int): The batch size of the input tensor when do increnmental prediction. Should be a positive
                value. When do training or prediction, the argument will not work and the user can just pass None to
                the argument.
            seq_length(int): The input sequence length.
            layer_id(int): The layer id of current transformer block layer.
            dim(int): The hidden size of the input.
            num_heads(int): The number of the heads.
            multiple_of(int): The SwiGLU hidden layer size multiple of large power of 2.
            norm_eps (float): The epsilon value of the denominator. Default 1e-5.
            compute_dtype(dtype.Number): The computation type of the layer.
                Should be mstype.float32 or mstype.float16. Default mstype.float32.
            layernorm_compute_type(dtype.Number): The computation type of the norm.
                Should be mstype.float32 or mstype.float16. Default mstype.float32.
            softmax_compute_type(dtype.Number): The computation type of the softmax in the attention.
                Should be mstype.float32 or mstype.float16. Default mstype.float32.
            param_init_type(dtype.Number): The parameter initialization type of the module.
                Should be mstype.float32 or mstype.float16. Default mstype.float32.
            use_past(bool): Use the past state to compute, used for incremental prediction. For example, if we have two
                words and want to generate the ten more words. We just need to compute the two words' state only once,
                and generate the next word one by one. When use_past is True, there are two steps to run the prediction.
                In the first step, set the is_first_iteration to be True by
                `model.add_flags_recursive(is_first_iteration=True)`, and pass the full inputs. Then, set the
                is_first_iteration to be False by `model.add_flags_recursive(is_first_iteration=False)`.
                At this moment, pass the single step's input tensor, and loop it. Default False.
            parallel_config(OpParallelConfig, MoEParallelConfig): The parallel configure. When MoE is applied,
                MoEParallelConfig is effective, otherwise OpParallelConfig is effective. Default `default_dpmp_config`,
                an instance of `OpParallelConfig` with default args.

        Inputs:
            - **x** (Tensor) - Float Tensor, shape should be [batch_size, seq_length, hidden_size] or
              [batch_size * seq_length, hidden_size], if the use_past is False or is_first_iteration=True. Otherwise,
              should be [batch_size, 1, hidden_size]
            - **alibi_tensor** (Tensor) - Alibi Tensor for position embedding used in attention.
            - **mask** (Tensor) - Float Tensor, If the use_past is False or is_first_iteration=True,
              the attention mask matrix should ba [batch_size, seq_length, seq_length], or None. None means there will
              be no mask in softmax computation. Otherwise, should be [batch_size, 1, hidden_size]
            - **init_reset** (Tensor) - A bool tensor with shape [1], used to clear the past key parameter and
              past value parameter used in the incremental prediction. Only valid when use_past is True. Default True.
            - **batch_valid_length** (Tensor) - Int32 tensor with shape [batch_size] the past calculated the index.
              Used for incremental prediction when the use_past is True. Default None.

        Outputs:
            Tuple, a tuple contains(`output`, `layer_present`).

            - **output** (Tensor) - The float tensor of the output of the layer with
              shape (batch_size, seq_length, hidden_size) or (batch_size * seq_length, hidden_size), if the use_past is
              False or is_first_iteration=True. Otherwise, it will be (batch_size, 1, hidden_size)

            - **layer_present** (Tuple) - A tuple of the Tensor of the projected key and value vector with
              ((batch_size, num_heads, head_dim, seq_length),
              (batch_size, num_heads, seq_length, head_dim)).

    """
    def __init__(self,
                 batch_size,
                 seq_length,
                 layer_id,
                 dim: int = 512,
                 n_heads: int = 8,
                 multiple_of: int = 256,
                 n_kv_heads: Optional[int] = None,
                 ffn_dim_multiplier: Optional[int] = None,
                 norm_eps: float = 1e-5,
                 compute_dtype=mstype.float16,
                 layernorm_compute_dtype=mstype.float32,
                 softmax_compute_dtype=mstype.float32,
                 param_init_type=mstype.float32,
                 use_past=False,
                 use_flash_attention=False,
                 compute_in_2d=False,
                 use_past_shard=False,
                 parallel_config=TransformerOpParallelConfig()):
        super().__init__()
        if batch_size or use_past:
            Validator.check_positive_int(batch_size)
        self.batch_size = batch_size

        self.compute_in_2d = compute_in_2d
        self.seq_length = seq_length
        self.layer_id = layer_id
        self.hidden_size = dim
        self.n_head = n_heads
        self.head_dim = self.hidden_size // self.n_head
        self.n_kv_head = n_heads if n_kv_heads is None else n_kv_heads

        self.dtype = compute_dtype
        self.is_first_iteration = True
        self.use_past = use_past
        self.compute_in_2d = compute_in_2d
        self.key_past = None
        self.value_past = None

        self.reshape = P.Reshape()
        self.add = P.Add()
        self.attention_norm = LlamaRMSNorm(self.hidden_size, norm_eps, compute_type=layernorm_compute_dtype)
        self.ffn_norm = LlamaRMSNorm(self.hidden_size, norm_eps, compute_type=layernorm_compute_dtype)
        self.attention = Baichuan13BAttention(batch_size=batch_size,
                                              seq_length=seq_length,
                                              dim=dim,
                                              n_heads=n_heads,
                                              n_kv_heads=n_kv_heads,
                                              compute_dtype=compute_dtype,
                                              softmax_compute_dtype=softmax_compute_dtype,
                                              param_init_type=param_init_type,
                                              use_past=use_past,
                                              use_flash_attention=use_flash_attention,
                                              compute_in_2d=compute_in_2d,
                                              use_past_shard=use_past_shard,
                                              parallel_config=parallel_config)
        self.feed_forward = LlamaFeedForward(dim=self.hidden_size,
                                             hidden_dim=4 * self.hidden_size,
                                             multiple_of=multiple_of,
                                             ffn_dim_multiplier=ffn_dim_multiplier,
                                             compute_dtype=compute_dtype,
                                             param_init_type=param_init_type)

        dp = parallel_config.data_parallel
        mp = parallel_config.model_parallel
        self.feed_forward.shard(parallel_config)
        if self.compute_in_2d:
            self.add.shard(((dp, 1), (dp, 1)))
            self.attention_norm.shard((dp, 1))
            self.ffn_norm.shard((dp, 1))
        else:
            self.add.shard(((dp, 1, 1), (dp, 1, 1)))
            self.attention_norm.shard((dp, 1, 1))
            self.ffn_norm.shard((dp, 1, 1))
            self.feed_forward.mul.shard(((dp, 1, mp), (dp, 1, mp)))

        if parallel_config.use_seq_parallel and self.is_first_iteration:
            if self.compute_in_2d:
                self.add.shard(((dp * mp, 1), (dp * mp, 1)))
                self.attention_norm.shard((dp * mp, 1))
                self.ffn_norm.shard((dp * mp, 1))
            else:
                self.add.shard(((dp, mp, 1), (dp, mp, 1)))
                self.attention_norm.shard((dp, mp, 1))
                self.ffn_norm.shard((dp, mp, 1))
            self.feed_forward.w2.shard(((dp, mp), (1, mp)), out_strategy_matmul=((dp * mp, 1),))

        if self.use_past:
            kv_shape = (batch_size, self.n_kv_head, seq_length, self.head_dim)
            self.key_past = Parameter(Tensor(np.zeros(kv_shape), self.dtype), name="key_past")
            self.value_past = Parameter(Tensor(np.zeros(kv_shape), self.dtype), name="value_past")
            self.mul_past = P.Mul().shard(((dp, 1, 1, 1), (1,)))
            self.assign_past = P.Assign().shard(((dp, 1, 1, 1), (dp, 1, 1, 1)))
            if use_past_shard:
                self.mul_past.shard(((dp, mp, 1, 1), (1,)))
                self.assign_past.shard(((dp, mp, 1, 1), (dp, mp, 1, 1)))

    def construct(self, x, alibi_tensor, mask=None, init_reset=True, batch_valid_length=None):
        """ Forward of transformer block. """
        bs = x.shape[0]
        if self.use_past:
            if not isinstance(init_reset, Tensor):
                init_reset = Tensor([init_reset], mstype.bool_)
            if not isinstance(batch_valid_length, Tensor):
                batch_valid_length = self.ones((bs, 1), mstype.int32)
        self._check_input(x, alibi_tensor, mask, init_reset, batch_valid_length)
        # [bs, seq/1, hidden_dim] (first) [bs * seq/1, hidden_dim] (others)
        if self.compute_in_2d and x.ndim != 2:
            x = self.reshape(x, (-1, x.shape[-1]))
        # [bs, seq/1, hidden_dim] or [bs * seq/1, hidden_dim]
        input_x = self.attention_norm(x)

        key_reset = None
        value_reset = None
        if self.use_past and self.is_first_iteration:
            # reset states, init_reset True for reuse and False for reset
            self.assign_past(self.key_past, self.mul_past(self.key_past, self.cast(init_reset, self.dtype)))
            self.assign_past(self.value_past, self.mul_past(self.value_past, self.cast(init_reset, self.dtype)))
            key_reset = self.key_past
            value_reset = self.value_past
            # add dependency for desired execution order
            input_x = ops.depend(input_x, key_reset)
            input_x = ops.depend(input_x, value_reset)
        # [bs, seq/1, hidden_dim] or [bs * seq/1, hidden_dim]
        h, layer_present = self.attention(input_x, alibi_tensor, mask,
                                          self.key_past, self.value_past, batch_valid_length)
        h = self.add(x, h)
        ffn_norm = self.ffn_norm(h)
        # [bs, seq/1, hidden_dim] or [bs * seq/1, hidden_dim]
        ffn_out = self.feed_forward(ffn_norm)

        value_update = None
        key_update = None
        if self.use_past:
            # current key and value
            key_present, value_present = layer_present
            # update key and value calculated this step
            self.assign_past(self.key_past, key_present)
            self.assign_past(self.value_past, value_present)
            key_update = self.key_past
            value_update = self.value_past
            # add dependency for desired execution order
            key_update = ops.depend(key_update, key_reset)
            value_update = ops.depend(value_update, value_reset)

        # add dependency for desired execution order
        ffn_out = ops.depend(ffn_out, value_update)
        ffn_out = ops.depend(ffn_out, key_update)
        # [bs, seq/1, hidden_dim] or [bs * seq/1, hidden_dim]
        out = self.add(h, ffn_out)
        return out, layer_present

    def _check_input(self, x, alibi_tensor, mask, init_reset, batch_valid_length):
        r"""Check inputs"""
        _check_input_dtype(
            x.dtype, "x", [mstype.float32, mstype.float16], self.cls_name)
        _check_input_dtype(alibi_tensor.dtype, "alibi_tensor",
                           [mstype.float32, mstype.float16], self.cls_name)
        if mask is not None:
            _check_input_dtype(mask.dtype, "input_mask", [mstype.float32, mstype.float16], self.cls_name)

        init_reset_is_tensor = isinstance(init_reset, Tensor)
        init_reset_is_default = init_reset is True
        batch_valid_length_is_tensor = isinstance(batch_valid_length, Tensor)
        batch_is_default = batch_valid_length is None
        _check_past_none_input_none(self.use_past, "init_reset", self.cls_name, True, init_reset_is_tensor,
                                    init_reset_is_default)
        _check_past_none_input_none(self.use_past, "batch_valid_length", self.cls_name, None,
                                    batch_valid_length_is_tensor, batch_is_default)

        if self.use_past:
            _check_input_dtype(init_reset.dtype, "init_reset", [mstype.bool_], self.cls_name)
            _check_input_dtype(batch_valid_length.dtype, "batch_valid_length", [mstype.int32], self.cls_name)
        return True


class Baichuan13BAttention(nn.Cell):
    r"""
    This is an implementation of multihead attention in Baichuan.

    Args:
            - **batch_size** (int): The batch size of the input tensor when do increnmental prediction. Should be a
                positive value.
                When do training or prediction, the argument will not work and the user can just pass None to the
                argument.
            - **src_seq_length** (int): The sequence length of the query vector.
            - **tgt_seq_length** (int): The sequence length of the key and value vector.
            - **dim** (int): The hidden size of the input.
            - **head_dim** (int): The dim of head.
            - **n_heads** (int): The number of the heads.
            - **compute_dtype** (dtype.Number): The computation type of dense. Default mstype.float16.
                Should be mstype.float32 or mstype.float16.
            - **softmax_compute_type** (dtype.Number): The type of softmax computation module. Default mstype.float32.
                Should be mstype.float32 or mstype.float16.
            - **param_init_type** (dtype.Number): The parameter initialization type of the module. Default mstype.
                float32. Should be mstype.float32 or mstype.float16.
            - **use_past** (bool): Use the past state to compute, used for incremental prediction.
                For example, if we have two words and want to generate the ten more words.
                We just need to compute the two words' state only once, and generate the next word one by one.
                When use_past is True, there are two steps to run the prediction.
                In the first step, set the is_first_iteration to be True by
                `model.add_flags_recursive(is_first_iteration=True)`, and pass the full inputs. Then, set the
                is_first_iteration to be False by `model.add_flags_recursive(is_first_iteration=False)`. At this moment,
                pass the single step's input tensor, and loop it. Default False.
            - **parallel_config** (OpParallelConfig): The parallel configure. Default `default_dpmp_config`,
                an instance of `OpParallelConfig` with default args.

    Inputs:
            - **x** (Tensor) - The input tokens with shape (batch_size, src_seq_length, hidden_size) or
                (batch_size * src_seq_length, hidden_size), if the use_past is False or is_first_iteration=True.
                Otherwise, must be (batch_size, 1, hidden_size)
            - **alibi_tensor** (Tensor) - Alibi Tensor for position embedding used in attention.
            - **mask** (Tensor) - If the use_past is False or is_first_iteration=True, the attention mask
                matrix should ba (batch_size, src_seq_length, tgt_seq_length), or None. None means there will be no mask
                in softmax computation. Otherwise, the mask must be (batch_size, 1, tgt_seq_length)
            - **key_past** (Tensor) - Float16 tensor with shape (batch_size, num_heads, head_dim, tgt_seq_length).
                The past calculated key vector. Used for incremental prediction when the use_past is True.
                Default None.
            - **value_past** (Tensor) - Float16 tensor with shape (batch_size, num_heads, tgt_seq_length,
                head_dim).
                The past calculated value vector. Used for incremental prediction when the use_past is True.
                Default None.
            - **batch_valid_length** (Tensor) - Int32 tensor with shape (batch_size,) the past calculated the index.
                Used for incremental prediction when the use_past is True. Default None.

    Outputs:
            Tuple, a tuple contains(`output`, `layer_present`)

            - **output** (Tensor) - Tensor, the float tensor of the output of the layer with
                shape (batch_size, src_seq_length, hidden_size) or (batch_size * src_seq_length, hidden_size),
                if the use_past is False or is_first_iteration=True. Otherwise, it will be (batch_size, 1, hidden_size).

            - **layer_present** (Tuple) - A tuple of the Tensor of the projected key and value vector with
                ((batch_size, num_heads, head_dim, tgt_seq_length),
                (batch_size, num_heads, tgt_seq_length, head_dim)).
    """
    def __init__(self,
                 batch_size,
                 seq_length,
                 dim: int = 512,
                 n_heads: int = 8,
                 n_kv_heads: Optional[int] = None,
                 compute_dtype=mstype.float16,
                 softmax_compute_dtype=mstype.float32,
                 param_init_type=mstype.float32,
                 use_past=False,
                 use_flash_attention=False,
                 compute_in_2d=False,
                 use_past_shard=False,
                 parallel_config=TransformerOpParallelConfig()):
        super().__init__()
        self.seq_length = seq_length
        self.hidden_size = dim
        self.n_head = n_heads
        self.head_dim = dim // n_heads
        self.n_kv_head = n_heads if n_kv_heads is None else n_kv_heads
        self.n_rep = self.n_head // self.n_kv_head

        self.dtype = compute_dtype
        self.softmax_dtype = softmax_compute_dtype
        self.is_first_iteration = True
        self.use_past = use_past
        self.compute_in_2d = compute_in_2d
        self.use_flash_attention = use_flash_attention and FLASHATTENTION_VALID

        if self.hidden_size % self.n_head != 0:
            raise ValueError("For 'MultiHeadAttention', the class variable 'hidden_size' must be a multiple "
                             "of 'n_head', but got the hidden_size is {} and the n_head is {}."
                             .format(self.hidden_size, self.n_head))
        if self.n_kv_head % parallel_config.model_parallel != 0:
            raise ValueError("For 'MultiHeadAttention', the class variable 'n_kv_head' must be a multiple of "
                             "'parallel_config.model_parallel', but got the n_kv_head is {} "
                             "and the parallel_config.model_parallel  is {}."
                             .format(self.n_kv_head, parallel_config.model_parallel))

        self.inv_norm_factor = Tensor(1.0 / math.sqrt(self.head_dim), dtype=compute_dtype)

        self.reshape = P.Reshape()
        self.transpose = P.Transpose()
        self.merger_head_transpose = P.Transpose()
        self.batch_matmul = P.BatchMatMul()
        self.batch_matmul_q_k = P.BatchMatMul(transpose_b=True)
        self.mul = P.Mul()
        self.add = P.Add()
        self.add_alibi = P.Add()
        self.softmax = nn.Softmax().to_float(softmax_compute_dtype)
        self.cast = P.Cast()
        self.cast_attn = P.Cast()
        self.tile_kv = P.Tile()

        self.wo = Linear(in_channels=self.hidden_size,
                         out_channels=self.hidden_size,
                         has_bias=False,
                         compute_dtype=compute_dtype,
                         param_init_type=param_init_type)
        self.wq = Linear(self.hidden_size,
                         self.hidden_size,
                         has_bias=False,
                         compute_dtype=compute_dtype,
                         param_init_type=param_init_type)
        self.wk = Linear(self.hidden_size,
                         self.n_kv_head * self.head_dim,
                         has_bias=False,
                         compute_dtype=compute_dtype,
                         param_init_type=param_init_type)
        self.wv = Linear(self.hidden_size,
                         self.n_kv_head * self.head_dim,
                         has_bias=False,
                         compute_dtype=compute_dtype,
                         param_init_type=param_init_type)

        dp = parallel_config.data_parallel
        mp = parallel_config.model_parallel
        self.transpose.shard(((dp, 1, mp, 1),))
        self.merger_head_transpose.shard(((dp, mp, 1, 1),))
        self.batch_matmul_q_k.shard(((dp, mp, 1, 1), (dp, mp, 1, 1)))
        self.batch_matmul.shard(((dp, mp, 1, 1), (dp, mp, 1, 1)))
        self.mul.shard(((dp, mp, 1, 1), ()))
        self.add.shard(((dp, 1, 1, 1), (dp, mp, 1, 1)))
        self.add_alibi.shard(((dp, mp, 1, 1), (dp, mp, 1, 1)))
        self.softmax.softmax.shard(((dp, mp, 1, 1),))
        self.tile_kv.shard(((dp * mp, 1, 1, 1),))

        self.wq.shard(((dp, 1), (mp, 1)))
        self.wk.shard(((dp, 1), (mp, 1)))
        self.wv.shard(((dp, 1), (mp, 1)))
        self.wo.shard(((dp, mp), (1, mp)))
        if parallel_config.use_seq_parallel and self.is_first_iteration:
            self.wo.shard(((dp, mp), (1, mp)), out_strategy_matmul=((dp * mp, 1),))
        if parallel_config.recompute.select_recompute:
            self.tile_kv.recompute()
            self.batch_matmul_q_k.recompute()
            self.mul.recompute()
            self.add.recompute()
            self.cast_attn.recompute()
            self.softmax.softmax.recompute()
            self.batch_matmul.recompute()

        if self.use_flash_attention:
            self.flash_attention = FlashAttention(self.head_dim, dp=dp, mp=mp, next_block_num=0)
            self.flash_attention.shard(((dp, mp, 1, 1), (dp, mp, 1, 1), (dp, mp, 1, 1), (dp, 1, 1), ()))
            if parallel_config.recompute.select_recompute:
                self.flash_attention.recompute()

        if self.use_past:
            # operators used for state reuse
            seq_range = np.arange(seq_length).reshape(1, 1, -1)
            self.range = Tensor(np.tile(seq_range, (batch_size, 1, 1)), mstype.int32)
            self.expand_dims = P.ExpandDims().shard(((dp, 1, 1),))
            self.add_past = P.Add().shard(((dp, 1, 1, 1), (dp, 1, 1, 1)))
            self.equal = P.Equal().shard(((dp, 1, 1), (dp, 1, 1)))
            self.less = P.Less().shard(((dp, 1, 1), (dp, 1, 1)))
            self.mul_past = P.Mul().shard(((dp, 1, 1, 1), (dp, 1, 1, 1)))
            if use_past_shard:
                self.add_past.shard(((dp, mp, 1, 1), (dp, mp, 1, 1)))
                self.mul_past.shard(((dp, mp, 1, 1), (dp, 1, 1, 1)))

    def construct(self, x: Tensor, alibi_tensor: Tensor, mask=None,
                  key_past=None, value_past=None, batch_valid_length=None):
        """Forward process of the MultiHeadAttention"""
        ori_dtype = x.dtype
        # [bs, seq/1, hidden_dim] or [bs * seq/1, hidden_dim]
        x = self.reshape(x, (-1, x.shape[-1]))
        # [bs * seq/1, hidden_dim]
        query = self.cast(self.wq(x), self.dtype)  # dp, 1 -> dp, mp
        key = self.cast(self.wk(x), self.dtype)    # dp, 1 -> dp, mp
        value = self.cast(self.wv(x), self.dtype)  # dp, 1 -> dp, mp
        query = self.reshape(query, (-1, self._get_seq_length_under_incremental(self.seq_length),
                                     self.n_head, self.head_dim))
        key = self.reshape(key, (-1, self._get_seq_length_under_incremental(self.seq_length),
                                 self.n_kv_head, self.head_dim))
        value = self.reshape(value, (-1, self._get_seq_length_under_incremental(self.seq_length),
                                     self.n_kv_head, self.head_dim))
        # [bs, seq/1, n_head/n_kv_head, head_dim]
        query = self.transpose(query, (0, 2, 1, 3))
        key = self.transpose(key, (0, 2, 1, 3))
        value = self.transpose(value, (0, 2, 1, 3))

        # kv cache: [bs, n_kv_head, 1, head_dim] -> [bs, n_kv_head, seq, head_dim]
        key_present = key
        value_present = value
        if self.use_past:
            # The first graph with the input size of (bs, seq_length)
            if self.is_first_iteration:
                # Get the valid input length without padding
                valid_length_vector = (
                    self.less(self.range, batch_valid_length.view(-1, 1, 1))).astype(self.dtype)
                # Cover the key and value numbers corresponding to the padding position
                key_present = self.mul_past(key, self.expand_dims(valid_length_vector, 3))
                value_present = self.mul_past(value, self.expand_dims(valid_length_vector, 3))
            # The second graph with the inpus size of (bs, 1)
            else:
                # Get the current token position index
                valid_length = batch_valid_length - 1
                valid_length = self.reshape(valid_length, (-1, 1, 1))
                valid_length_vector = (self.equal(self.range, valid_length)).astype(self.dtype)
                # Pad the key and value to seq_length with only the position index not zero
                current_key = self.mul_past(key, self.expand_dims(valid_length_vector, 3))
                current_value = self.mul_past(value, self.expand_dims(valid_length_vector, 3))
                # Concat the previous saved state and current state
                key = self.add_past(key_past, current_key)
                value = self.add_past(value_past, current_value)
                # Update key_present and value_present for state update
                key_present = key
                value_present = value

        layer_present = (key_present, value_present)
        # kv share: [bs, n_kv_head, seq, head_dim] -> [bs, n_head, seq, head_dim]
        key = self._repeat_kv(key, self.n_rep)
        value = self._repeat_kv(value, self.n_rep)
        # q, k, v: [bs, n_head, seq/1, head_dim], [bs, n_head, seq, head_dim], [bs, n_head, seq, head_dim]
        if self.use_flash_attention:
            attention = self.flash_attention(query, key, value, mask)
            attention = self._merge_heads(attention)
        else:
            attention = self._attn(query, key, value, alibi_tensor, mask)
        # [bs, seq/1, hidden_dim] or [bs * seq/1, hidden_dim]
        output = self.wo(attention) # dp, mp -> dp, 1 / dp * mp, 1
        output = self.cast(output, ori_dtype)

        return output, layer_present

    def _repeat_kv(self, x, rep):
        if rep == 1:
            return x
        bs, n_kv_head, seqlen, head_dim = x.shape
        x = self.reshape(x, (bs * n_kv_head, 1, seqlen, head_dim))
        x = self.tile_kv(x, (1, rep, 1, 1))
        x = self.reshape(x, (bs, n_kv_head * rep, seqlen, head_dim))
        return x

    def _get_seq_length_under_incremental(self, length):
        r"""Return the length of the tensor.
            For the incremental prediction, the seq length for the input is 1.
        """
        if self.use_past and not self.is_first_iteration:
            return 1
        return length

    def _merge_heads(self, x):
        """
        convert a 4d input to a 2d or 3d output

        Inputs:
            x: input tensor

        Output:
            x_merge: the 2d output
        """
        # [bs, n_head, seq/1, head_dim]
        x = self.merger_head_transpose(x, (0, 2, 1, 3)) # dp,mp,1,1 -> dp,1,mp,1
        # [bs, seq/1, n_head, head_dim]
        x_shape = x.shape
        if self.compute_in_2d:
            # [bs * seq/1, hidden_dim]
            new_shape = (-1, x_shape[-2] * x_shape[-1])
        else:
            # [bs, seq/1, hidden_dim]
            new_shape = (x_shape[0], x_shape[1], -1)
        x_merge = self.reshape(x, new_shape)
        return x_merge

    def _attn(self, query, key, value, alibi_tensor, mask):
        """
        Get the weighted score along the seq_length

        Inputs:
            query: the query matrix
            key: the key matrix
            value: the value matrix
            mask: the attention mask adder matrix with shape (batch_size,
            1, seq_length, seq_length)
        Outputs:
            weighted_values: Tensor, the weighted sum scores
        """
        # q, k: [bs, n_head, seq/1, head_dim], [bs, n_head, seq, head_dim]
        score = self.batch_matmul_q_k(query, key)
        # score: [bs, n_head, seq/1, seq]
        score = self.mul(score, self.inv_norm_factor)
        score = self.add_alibi(score, alibi_tensor)

        score = self.add(mask, score)

        attention_probs = self.softmax(self.cast_attn(score, self.softmax_dtype))
        # score, v: [bs, n_head, seq/1, seq], [bs, n_head, seq, head_dim]
        weighted_values = self.batch_matmul(self.cast(attention_probs, self.dtype), value)
        # [bs, n_head, seq/1, head_dim]
        attention_merge = self._merge_heads(weighted_values)
        # [bs, seq/1, hidden_dim] or [bs * seq/1, hidden_dim]
        return attention_merge


class NormHead(nn.Cell):
    """
    NormHead Layer.

        Args:
            hidden_size (int): The hidden size of the input.
            vocab_size (int): Size of the dictionary of embeddings.
            compute_type (dtype.Number): The compute type.
            eps (number): A small positive value prevents division by zero.

        Inputs:
            - hidden_states (Tensor) - Tensor of shape :math:`(batch, seq_length, hidden_size)`.

        Outputs:
            Tensor of shape :math:`(batch, seq_length, vocab_size)`.
    """
    def __init__(self,
                 hidden_size,
                 vocab_size,
                 compute_dtype=mstype.float32,
                 eps=1e-5):
        super().__init__()
        self.weight = Parameter(
            initializer(HeUniform(negative_slope=math.sqrt(5)),
                        [vocab_size, hidden_size],
                        mstype.float32),
            name='weight',
            parallel_optimizer=False)
        self.square = P.Square()
        self.sqrt = P.Sqrt()
        self.add = P.Add()
        self.real_div = P.RealDiv()
        self.eps = Tensor([eps], mstype.float32)

        self.matmul = P.MatMul(transpose_b=True)
        self.cast = P.Cast()
        self.compute_dtype = compute_dtype
        self.hidden_size = hidden_size
        self.vocab_size = vocab_size

    def construct(self, hidden_states):
        """Forward process of the NormHead"""
        out_shape = P.Shape()(hidden_states)[:-1] + (self.vocab_size,)
        hidden_states = P.Reshape()(hidden_states, (-1, self.hidden_size))

        variance = self.square(self.weight).sum(axis=1).reshape(-1, 1)
        variance_eps = self.sqrt(self.add(variance, self.eps))
        norm_weight = self.real_div(self.weight, variance_eps)

        ori_type = hidden_states.dtype
        out = self.matmul(hidden_states.astype(self.compute_dtype),
                          norm_weight.astype(self.compute_dtype))
        out = P.Reshape()(out, out_shape)
        return self.cast(out, ori_type)

    def shard(self, parallel_config):
        """sharding for norm head"""
        if parallel_config.vocab_emb_dp:
            self.square.shard(((1, 1),))
            self.sqrt.shard(((1, 1),))
            self.add.shard(((1, 1), (1,)))
            self.real_div.shard(((1, 1), (1, 1)))
            self.matmul.shard(((parallel_config.data_parallel, 1), (1, 1)))
        else:
            self.square.shard(((parallel_config.model_parallel, 1),))
            self.sqrt.shard(((parallel_config.model_parallel, 1),))
            self.add.shard(((parallel_config.model_parallel, 1), (1,)))
            self.real_div.shard(((parallel_config.model_parallel, 1),
                                 (parallel_config.model_parallel, 1)))
            self.matmul.shard(((parallel_config.data_parallel, 1),
                               (parallel_config.model_parallel, 1)))

        if parallel_config.pipeline_stage > 1:
            self.matmul.pipeline_stage = parallel_config.pipeline_stage - 1

你可能感兴趣的:(fastapi)

SQLAdmin 开源项目教程羿辰果Gemstone
SQLAdmin开源项目教程项目地址:https://gitcode.com/gh_mirrors/sq/sqladmin项目介绍SQLAdmin是一个灵活的Admin接口，专为SQLAlchemy模型设计。它支持SQLAlchemy的同步/异步引擎，并与Starlette和FastAPI框架集成。SQLAdmin使用WTForms进行表单构建，并支持SQLModel。其用户界面基于Tabler。
探索未来技术前沿：FastAPI火箭-boilerplate，打造高性能API的超级引擎！黎杉娜Torrent
探索未来技术前沿：FastAPI火箭-boilerplate，打造高性能API的超级引擎！fastapi-rocket-boilerplateFastAPIRocketBoilerplatetobuildanAPIbasedinPythonwithitsmostmoderntechnologies!项目地址:https://gitcode.com/gh_mirrors/fa/fastapi-roc
多阶段构建实现 Docker 加速与体积减小：含文件查看、上传及拷贝功能的 FastAPI 应用镜像构建九不多 Docker docker fastapi python YOLO
本文围绕使用Docker构建FastAPI应用镜像展开，着重介绍了多阶段构建的Dockerfile编写及相关操作。借助多阶段构建，不仅实现了Docker构建的加速，还有效减小了镜像体积。1.Dockerfile内容以下是我们要使用的Dockerfile内容：#第一个阶段-构建应用FROMdocker.1ms.run/python:3.9ASbuilder#设置工作目录WORKDIR/app#复制依
基于ThinkPHP6用户登录逻辑，结合FastAPI框架实现用户登录系统的全流程解析 Wiktok python fastapi
基于ThinkPHP6用户登录逻辑，结合FastAPI框架实现用户登录系统的全流程解析，涵盖路由配置、数据验证、JWT令牌生成与鉴权、中间件依赖等核心环节：1.路由配置与请求处理路由定义：使用APIRouter组织用户认证相关接口（注册、登录），并通过app.include_router()集成到主应用。例如：#routers/auth.pyfromfastapiimportAPIRouterro
FastAPI Web框架 [1.9] carefree798 FastAPI python
表单数据fromfastapiimportFastAPI,Form#导入Form#表单数据#接收的不是JSON，而是表单字段时，要使用Formapp=FastAPI()@app.post("/login/")asyncdeflogin(username:str=Form(...),password:str=Form(...)#定义Form参数):return{"username":username
FastAPI 最佳架构项目推荐穆耀双
FastAPI最佳架构项目推荐fastapi_best_architectureFastAPIbasedontheconstructionofthefrontandbackoftheseparationofrightscontrolsystem,usingauniquepseudothree-tierarchitecturemodeldesign,andasatemplatelibraryfree
推荐开源项目：FastAPI Best Architecture — 极致的后端架构设计蓬玮剑
推荐开源项目：FastAPIBestArchitecture—极致的后端架构设计项目地址:https://gitcode.com/gh_mirrors/fa/fastapi_best_architecture项目简介在寻找一款基于FastAPI构建的强大且灵活的后端解决方案吗？那么，你已经找到了——FastAPIBestArchitecture。这是一个遵循前端与后端分离原则的中间件层解决方案，采
Python Web框架 Flask vs Django vs FastAPI ZengDerby python flask fastapi django
如果您需要构建大型的、功能丰富的应用程序，Django可能是一个很好的选择。如果您需要更灵活的框架，可以选择Flask来定制开发。而对于追求极致性能和高并发处理的项目，FastAPI可能是一个更加理想的选择。优缺点Flask在小型项目或微服务理想的选择。Flask灵活且轻量，非常适合快速开发小型应用。Flask是一个非常灵活的框架，它允许您根据项目需求进行定制。您可以根据需要选择合适的插件和扩展。
AI工具如何改变编程学习？Trae IDE与Claude 3.5的实践案例黑金IT AI智能 AI编程 fasttify 人工智能学习 ide
在现在这个到处都是电脑和手机的时代，AI工具正在变成编程学习和开发的好帮手。今天，咱们就来好好聊聊AI工具，特别是TraeIDE和Claude3.5这两个工具，在学习FastAPI和构建知识图谱的时候有多厉害，还有它们对编程行业会有什么影响。一、AI工具：编程学习与开发的好帮手AI工具在编程学习和开发里，作用可太大了。就像TraeIDE和Claude3.5，它们能像好朋友一样，在写代码的时候帮忙检
使用fastapi部署stable diffusion模型明晚十点睡代码 fastapi stable diffusion pytorch python 人工智能深度学习计算机视觉
使用vscode运行stablediffusion模型，每次加载模型都需要10+分钟，为算法及prompt调试带来了极大麻烦。使用jupyter解决自然是一个比较好的方案，但如果jupyter由于种种原因不能使用时，fastapi无疑成为了一个很好的选择。参考github链接：https://github.com/jarvislabsai/fastapi-sd-templatefromfastap
FastAPI测试策略：参数解析单元测试 qcidyu 文章归档异常传播验证依赖注入测试请求模拟技术测试覆盖率优化 Pydantic验证测试单元测试策略参数解析测试
扫描二维码关注或者微信搜一搜：编程智域前端至全栈交流与成长探索数千个预构建的AI应用，开启你的下一个伟大创意第一章：核心测试方法论1.1三层测试体系架构#第一层：模型级测试deftest_user_model_validation():withpytest.raises(ValidationError):User(age=-5)#第二层：依赖项测试deftest_auth_dependency()
Trae智能协作AI编程工具IDE：如何在MacBook Pro下载、安装和配置使用Trae？
Trae智能协作AI编程工具IDE：如何在MacBookPro下载、安装和配置使用Trae？一、为什么选择Trae智能协作IDE？在AI编程新时代，Trae通过以下突破性功能重新定义开发体验：双向智能增强：AI不仅提供代码补全，更能理解上下文主动建议架构优化方案自然语言编程：支持"用Python写一个带JWT验证的FastAPI用户系统"式开发实时协作画布：可视化呈现AI生成的代码逻辑，支持多模态
fastapi+angular外卖系统勘察加熊人 typescript fastapi angular.js 前端
说明：fastapi+angular外卖系统1.美食分类（粥，粉，面，炸鸡，炒菜，西餐，奶茶等等）2.商家列表（kfc，兰州拉面，湘菜馆，早餐店，重庆小面，潮汕砂锅粥，蜜雪冰城等等）商家item：商家店名，评分，月销量，人均价格，起送价格，配送费价格，店铺位置，商家标签，商家分类3.商家详情页商家店名，评分，月销量菜品分类（比如炒饭，拉面，盖饭，单人套餐，双人套餐，米线，酒水饮料）菜品列表菜品it
【python】Python中常见的KeyError报错分析景天科技苑 python 开发语言 python报错 KeyError
✨✨欢迎大家来到景天科技苑✨✨养成好习惯，先赞后看哦~作者简介：景天科技苑《头衔》：大厂架构师，华为云开发者社区专家博主，阿里云开发者社区专家博主，CSDN全栈领域优质创作者，掘金优秀博主，51CTO博客专家等。《博客》：Python全栈，前后端开发，小程序开发，人工智能，js逆向，App逆向，网络系统安全，数据分析，Django，fastapi，flask等框架，linux，shell脚本等实操
fastapi+angular实现菜鸟驿站系统勘察加熊人 typescript fastapi angular.js 前端
说明：我计划用fastapi+angular实现菜鸟驿站系统userid和stationid暂时先写死全部写成1也就是用户1驿站1这样就可以简化流程1.新增包裹入库增加一个添加入库的按钮然后填写信息然后入库2.新增包裹取件按钮post请求，弹窗填写取件码，取件成功需要刷新包裹状态3.获取超时列表比如有些包裹严重超时我需要查看超时包裹的信息和位置4.还需要取件记录表用弹窗每次用户取件都必须有一条取件
大模型工程师学习日记（五）：基于LangServe的AI服务架构深度解析 MMMMMMMay Love Code 学习架构语言模型深度学习人工智能 git
1.概述LangServe️帮助开发者将LangChain可运行和链部署为RESTAPI。该库集成了FastAPI并使用pydantic进行数据验证。Pydantic是一个在Python中用于数据验证和解析的第三方库，现在是Python中使用广泛的数据验证库。它利用声明式的方式定义数据模型和Python类型提示的强大功能来执行数据验证和序列化，使您的代码更可靠、更可读、更简洁且更易于调试。。它还可
fastapi+angular实现Tcp在线聊天室功能勘察加熊人 typescript fastapi angular.js tcp/ip
说明：我计划用fastapi+angular，实现一个在线聊天室的功能，1.必须有一个服务端和多个客户端2.用一个列表，显示当前所有在线的用户3.所有在线的用户，必须实现群聊和单独聊天效果图：新增安卓测试程序C:\Users\wangrusheng\AndroidStudioProjects\MyApplication9\app\src\test\java\com\example\myapplic
PyQt6内嵌http.server Web 和Flask Web服务器方法详解 mosquito_lover1 python pyqt flask http
PyQt6可以内嵌一个简单的Web服务器。虽然PyQt6本身不提供直接的Web服务器功能，但可以结合Python的标准库（如http.server）或其他Web框架（如Flask、FastAPI等）来实现。示例：使用http.server内嵌Web服务器以下是一个简单的例子，展示如何在PyQt6应用中内嵌一个基本的Web服务器：importsysfromPyQt6.QtWidgetsimportQ
【DeepSeek应用】本地部署deepseek模型后，如何在vscode中调用该模型进行代码撰写，检视和优化？ AndrewHZ 深度学习新浪潮 AI算法工程师面试指北 vscode 人工智能深度学习 DeepSeek 算法语言模型编辑器
若已成功在本地部署了DeepSeek模型（例如通过vscode-llm、ollama或私有API服务），在VSCode中调用本地模型进行代码撰写、检视和优化的完整流程如下：1.准备工作：确认本地模型服务状态模型服务类型：若使用HTTPAPI服务（如FastAPI/Flask封装），假设服务地址为http://localhost:8000。若使用ollama部署，模型名称为deepseek，调用命令
使用FastAPI部署bge-base和bge-reranker MoyiTech fastapi python 开发语言 RAG rerank
最近在做RAG项目，会频繁使用到本地embedding模型和rerank模型，但是每次跑demo都要用10来秒加载模型，非常慢，所以就封装了接口用于直接调用importosimportnumpyasnpimportloggingimportuvicornimportdatetimefromfastapiimportFastAPI,Security,HTTPExceptionfromfastapi.
FastAPI Web框架 [1.11] carefree798 FastAPI python
路径操作配置#路径操作配置#路径操作装饰器支持多种配置参数。#以下参数应直接传递给路径操作装饰器，不能传递给路径操作函数.#status_code状态码:status_code用于定义路径操作响应中的HTTP状态码#可以直接传递int代码，比如404#如果记不住数字码的涵义，也可以用status的快捷常量fromtypingimportOptional,SetfromfastapiimportFa
FastAPI完全指南：实现高效、安全的Web开发教IT的无语强 fastapi 安全前端
FastAPI完全指南：实现高效、安全的Web开发*引言：介绍FastAPI的优势和应用场景*FastAPI的主要特点应用场景为何选择FastAPIFastAPI的安装和基础配置*安装FastAPI创建基本的FastAPI应用构建你的第一个FastAPI应用*创建路由和视图响应处理*自定义响应异常处理高级响应处理*背景任务流式响应文件响应交互式API文档**SwaggerUIReDoc自定义文档使
如何快速创建Fastapi项目黄小耶@ fastapi python linux
一、环境安装安装第三方库来搭建项目依赖pipinstallfastapi#fastapi框架pipinstalltortoise-orm[accel]#数据库的ormpipinstalluvicorn#web服务器pipinstallpyjwt#权限认证pipinstallaerich#数据库迁移pipinstallaiomysql#Mysql数据库驱动二、项目结构构建大型的项目结构，文件结构如下
FastAPI安全防护指南：构建坚不可摧的参数处理体系 qcidyu fastapi 安全
扫描二维码关注或者微信搜一搜：编程智域前端至全栈交流与成长探索数千个预构建的AI应用，开启你的下一个伟大创意第一章：输入验证体系1.1类型安全革命frompydanticimportBaseModel,PaymentCardNumberfrompydantic.typesimportSecretStrclassUserRequest(BaseModel):username:str=Field(mi
fastapi和php,Sanic vs Fastapi 性能对比扫盲君 fastapi和php
Sanic，Fastapi都是优秀的pythonweb异步框架，找了半天没找到靠谱的性能对比测试，只能自己做一个。测试内容：纯get请求、异步ORM读测试工具：WRK测试代码：由于两者代码非常相似，本文就直接放上fastapi的代码demo了。1、纯get请求压测：1.1：代码代码demo1.2:结果1.2.1SanicRunning30stest@http://0.0.0.0:7006/4thr
FastAPI教程——并发async/await 雾重烟秋大语言模型实战 fastapi
本文参考FastAPI教程https://fastapi.tiangolo.com/zh/tutorial并发async/await有关路径操作函数的asyncdef语法以及异步代码、并发和并行的一些背景知识。通过asyncdef声明你的路径操作函数：@app.get('/')asyncdefread_results():results=awaitsome_library()returnresul
FastAPI 使用教程：将函数改造成可调用的 API（附完整示例） Lunar* 开发与工具使用 fastapi
引言FastAPI是一个现代化、快速（高性能）、基于标准Python类型提示构建的Web框架，非常适合开发RESTfulAPI。如果你有现成的Python函数，并希望通过HTTP请求调用它，FastAPI可以帮助你快速实现。在本文中，我们将介绍如何使用FastAPI将已有函数改造成API，并通过GET和POST请求调用它。同时，我们还会展示如何在服务运行时，自定义主机地址、端口号以及启用SSL证书
Python和FastAPI框架开发和容器化部署AWS上支持多种LLM和向量数据库的微服务API weixin_30777913 python 语言模型微服务 aws
用FastAPI创建一个输入提示词和所使用的LLM名称和向量搜索方式的API，返回LLM输出文本，其中用到OpenAIGPT4o3和AWSBedrock上的多个LLM模型的API，通过内部的类配置使用的模型和向量数据搜索类型，向量数据搜索类型包括faiss向量数据库和AWSKendra向量数据库搜索服务，这样的逻辑用设计模式中的工厂模式实现，用Python实现Docker打包项目Python代码并
Python项目在 Cursor 编辑器中 Conda 环境配置问题 phper8 python python conda cursor
在开发使用FastApi框架的Python某项目时，我遇到了一个因为Conda环境配置没有起作用的问题。我是用Cursor编辑器开发FastApi项目的，项目运行在Conda环境blog中，由于使用了opencc-python-reimplemented，Python版本要求3.10.16。于是我创建blog基于此版本：condacreate-nblogpython=3.10.16但Cursor的
Fastapi中Depends、Form、Query、Body、File区别 AI专题精讲 python python
在FastAPI中，Depends、Form、Query、Body和File用于处理不同类型的请求数据。1.Depends（依赖注入）Depends主要用于引入依赖，比如权限验证、数据库连接等，FastAPI会自动调用被依赖的函数，并将返回值传递给视图函数。示例：fromfastapiimportFastAPI,Dependsapp=FastAPI()defget_current_user():r
矩阵求逆（JAVA）初等行变换 qiuwanchi 矩阵求逆（JAVA）
package gaodai.matrix; import gaodai.determinant.DeterminantCalculation; import java.util.ArrayList; import java.util.List; import java.util.Scanner; /** * 矩阵求逆(初等行变换) * @author 邱万迟 *
JDK timer antlove java jdk schedule code timer
1.java.util.Timer.schedule(TimerTask task, long delay)：多长时间（毫秒）后执行任务 2.java.util.Timer.schedule(TimerTask task, Date time)：设定某个时间执行任务 3.java.util.Timer.schedule(TimerTask task, long delay,longperiod
JVM调优总结 -Xms -Xmx -Xmn -Xss coder_xpf jvm 应用服务器
堆大小设置JVM 中最大堆大小有三方面限制：相关操作系统的数据模型（32-bt还是64-bit）限制；系统的可用虚拟内存限制；系统的可用物理内存限制。32位系统下，一般限制在1.5G~2G；64为操作系统对内存无限制。我在Windows Server 2003 系统，3.5G物理内存，JDK5.0下测试，最大可设置为1478m。典型设置： java -Xmx
JDBC连接数据库 Array_06 jdbc
package Util; import java.sql.Connection; import java.sql.DriverManager; import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Statement; public class JDBCUtil { //完
Unsupported major.minor version 51.0（jdk版本错误） oloz java
java.lang.UnsupportedClassVersionError: cn/support/cache/CacheType : Unsupported major.minor version 51.0 (unable to load class cn.support.cache.CacheType) at org.apache.catalina.loader.WebappClassL
用多个线程处理1个List集合 362217990 多线程 thread list 集合
昨天发了一个提问，启动5个线程将一个List中的内容，然后将5个线程的内容拼接起来，由于时间比较急迫，自己就写了一个Demo，希望对菜鸟有参考意义。。 import java.util.ArrayList; import java.util.List; import java.util.concurrent.CountDownLatch; public c
JSP简单访问数据库香水浓 sql mysql jsp
学习使用javaBean，代码很烂，仅为留个脚印 public class DBHelper { private String driverName; private String url; private String user; private String password; private Connection connection; privat
Flex4中使用组件添加柱状图、饼状图等图表 AdyZhang Flex
1.添加一个最简单的柱状图 ? 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 <?xml version= "1.0"&n
Android 5.0 - ProgressBar 进度条无法展示到按钮的前面 aijuans android
在低于SDK < 21 的版本中，ProgressBar 可以展示到按钮前面，并且为之在按钮的中间，但是切换到android 5.0后进度条ProgressBar 展示顺序变化了，按钮再前面，ProgressBar 在后面了我的xml配置文件如下： [html] view plain copy <RelativeLa
查询汇总的sql baalwolf sql
select list.listname, list.createtime,listcount from dream_list as list , (select listid,count(listid) as listcount from dream_list_user group by listid order by count(
Linux du命令和df命令区别 BigBird2012 linux
1，两者区别 du，disk usage,是通过搜索文件来计算每个文件的大小然后累加，du能看到的文件只是一些当前存在的，没有被删除的。他计算的大小就是当前他认为存在的所有文件大小的累加和。
AngularJS中的$apply，用还是不用？ bijian1013 JavaScript AngularJS $apply
在AngularJS开发中，何时应该调用$scope.$apply()，何时不应该调用。下面我们透彻地解释这个问题。但是首先，让我们把$apply转换成一种简化的形式。 scope.$apply就像一个懒惰的工人。它需要按照命
[Zookeeper学习笔记十]Zookeeper源代码分析之ClientCnxn数据序列化和反序列化 bit1129 zookeeper
ClientCnxn是Zookeeper客户端和Zookeeper服务器端进行通信和事件通知处理的主要类，它内部包含两个类，1. SendThread 2. EventThread， SendThread负责客户端和服务器端的数据通信，也包括事件信息的传输，EventThread主要在客户端回调注册的Watchers进行通知处理 ClientCnxn构造方法 &
【Java命令一】jmap bit1129 Java命令
jmap命令的用法： [hadoop@hadoop sbin]$ jmap Usage: jmap [option] <pid> (to connect to running process) jmap [option] <executable <core> (to connect to a
Apache 服务器安全防护及实战 ronin47
此文转自IBM. Apache 服务简介 Web 服务器也称为 WWW 服务器或 HTTP 服务器 (HTTP Server)，它是 Internet 上最常见也是使用最频繁的服务器之一，Web 服务器能够为用户提供网页浏览、论坛访问等等服务。由于用户在通过 Web 浏览器访问信息资源的过程中，无须再关心一些技术性的细节，而且界面非常友好，因而 Web 在 Internet 上一推出就得到
unity 3d实例化位置出现布置？ brotherlamp unity教程 unity unity资料 unity视频 unity自学
问：unity 3d实例化位置出现布置？答：实例化的同时就可以指定被实例化的物体的位置,即 position Instantiate (original : Object, position : Vector3, rotation : Quaternion) : Object 这样你不需要再用Transform.Position了, 如果你省略了第二个参数(
《重构，改善现有代码的设计》第八章 Duplicate Observed Data bylijinnan java 重构
import java.awt.Color; import java.awt.Container; import java.awt.FlowLayout; import java.awt.Label; import java.awt.TextField; import java.awt.event.FocusAdapter; import java.awt.event.FocusE
struts2更改struts.xml配置目录 chiangfai struts.xml
struts2默认是读取classes目录下的配置文件，要更改配置文件目录，比如放在WEB-INF下，路径应该写成../struts.xml(非/WEB-INF/struts.xml) web.xml文件修改如下： <filter> <filter-name>struts2</filter-name> <filter-class&g
redis做缓存时的一点优化 chenchao051 redis hadoop pipeline
最近集群上有个job，其中需要短时间内频繁访问缓存，大概7亿多次。我这边的缓存是使用redis来做的，问题就来了。首先，redis中存的是普通kv，没有考虑使用hash等解结构，那么以为着这个job需要访问7亿多次redis，导致效率低，且出现很多redi
mysql导出数据不输出标题行 daizj mysql 数据导出去掉第一行去掉标题
当想使用数据库中的某些数据，想将其导入到文件中，而想去掉第一行的标题是可以加上-N参数如通过下面命令导出数据： mysql -uuserName -ppasswd -hhost -Pport -Ddatabase -e " select * from tableName" > exportResult.txt 结果为： studentid
phpexcel导出excel表简单入门示例 dcj3sjt126com PHP Excel phpexcel
先下载PHPEXCEL类文件，放在class目录下面，然后新建一个index.php文件，内容如下 <?php error_reporting(E_ALL); ini_set('display_errors', TRUE); ini_set('display_startup_errors', TRUE); if (PHP_SAPI == 'cli') die('
爱情格言 dcj3sjt126com 格言
1) I love you not because of who you are, but because of who I am when I am with you. 　　我爱你，不是因为你是一个怎样的人，而是因为我喜欢与你在一起时的感觉。 　　2) No man or woman is worth your tears, and the one who is, won‘t
转 Activity 详解——Activity文档翻译 e200702084 android UI sqlite 配置管理网络应用
activity 展现在用户面前的经常是全屏窗口，你也可以将 activity 作为浮动窗口来使用（使用设置了 windowIsFloating 的主题），或者嵌入到其他的 activity （使用 ActivityGroup ）中。当用户离开 activity 时你可以在 onPause() 进行相应的操作。更重要的是，用户做的任何改变都应该在该点上提交 ( 经常提交到 ContentPro
win7安装MongoDB服务 geeksun mongodb
1. 下载MongoDB的windows版本：mongodb-win32-x86_64-2008plus-ssl-3.0.4.zip，Linux版本也在这里下载，下载地址： http://www.mongodb.org/downloads 2. 解压MongoDB在D:\server\mongodb, 在D:\server\mongodb下创建d
Javascript魔法方法:__defineGetter__,__defineSetter__ hongtoushizi js
转载自： http://www.blackglory.me/javascript-magic-method-definegetter-definesetter/ 在javascript的类中,可以用defineGetter和defineSetter_控制成员变量的Get和Set行为例如,在一个图书类中,我们自动为Book加上书名符号: function Book(name){
错误的日期格式可能导致走nginx proxy cache时不能进行304响应 jinnianshilongnian cache
昨天在整合某些系统的nginx配置时，出现了当使用nginx cache时无法返回304响应的情况，出问题的响应头： Content-Type:text/html; charset=gb2312 Date:Mon, 05 Jan 2015 01:58:05 GMT Expires:Mon , 05 Jan 15 02:03:00 GMT Last-Modified:Mon, 05
数据源架构模式之行数据入口 home198979 PHP 架构行数据入口
注：看不懂的请勿踩，此文章非针对java，java爱好者可直接略过。一、概念行数据入口（Row Data Gateway）：充当数据源中单条记录入口的对象，每行一个实例。二、简单实现行数据入口为了方便理解，还是先简单实现： <?php /** * 行数据入口类 */ class OrderGateway { /*定义元数
Linux各个目录的作用及内容 pda158 linux 脚本
1）根目录“/” 　　根目录位于目录结构的最顶层，用斜线（/）表示，类似于 Windows 操作系统的“C:\“，包含Fedora操作系统中所有的目录和文件。　　2）/bin 　　/bin 　　目录又称为二进制目录，包含了那些供系统管理员和普通用户使用的重要 linux命令的二进制映像。该目录存放的内容包括各种可执行文件，还有某些可执行文件的符号连接。常用的命令有：cp、d
ubuntu12.04上编译openjdk7 ol_beta HotSpot jvm jdk OpenJDK
获取源码从openjdk代码仓库获取(比较慢) 安装mercurial Mercurial是一个版本管理工具。 sudo apt-get install mercurial 将以下内容添加到$HOME/.hgrc文件中，如果没有则自己创建一个： [extensions] forest=/home/lichengwu/hgforest-crew/forest.py fe
将数据库字段转换成设计文档所需的字段 vipbooks 设计模式工作正则表达式
哈哈，出差这么久终于回来了，回家的感觉真好！ PowerDesigner的物理数据库一出来，设计文档中要改的字段就多得不计其数，如果要把PowerDesigner中的字段一个个Copy到设计文档中，那将会是一件非常痛苦的事情。