Amazon Kinesis Data Firehose 是一项提取、转换、加载 (ETL) 服务,可以将串流数据以可靠方式捕获、转换和提供到数据湖、数据存储和分析服务中。
Amazon Kinesis Data Firehose 是将流数据加载到数据存储和分析工具的最简单方式。Kinesis Data Firehose 是一项完全托管式服务,让您可以轻松地从数十万个来源中捕获、转换大量流数据,并将其加载到 Amazon S3、Amazon Redshift、Amazon OpenSearch Service、Kinesis Data Analytics、通用 HTTP 终端节点,以及 Datadog、New Relic、MongoDB 和 Splunk 等的服务提供商中,从而获得近乎实时的分析与见解。
项目地址:aws-kinesis-example/http-desitination at master · JessicaWin/aws-kinesis-example · GitHub
Lambda prodcer & Lambda consumer:nodejs
'use strict'
const AWS = require('aws-sdk');
module.exports.sendDataToKinesisDataStream = async (event, context) => {
const kinesis = new AWS.Kinesis();
var params = {
Data: `${JSON.stringify(event)}`,
PartitionKey: 'test',
StreamName: process.env.STREAM_NAME
};
let data = await kinesis.putRecord(params).promise();
console.log(`${JSON.stringify(data)}`);
}
module.exports.consumeDataFromKinesisDataStream = (event, context) => {
console.log(`${JSON.stringify(event)}`);
const eventBody = JSON.parse(event.body);
for (let record of eventBody.records) {
const data = JSON.parse(Buffer.from(record.data, 'base64').toString("utf8"));
console.log(data);
}
const response = {};
response.requestId = eventBody.requestId;
response.timestamp = eventBody.timestamp;
context.succeed(response);
}
AWS Resources
service: aws-kinesis-example
provider:
name: aws
region: ${opt:region, 'ap-southeast-1'}
stage: ${opt:stage, 'develop'}
stackName: ${self:provider.stage}-${self:service}
runtime: nodejs14.x
memorySize: 1024
versionFunctions: false
iam:
role:
name: ${self:provider.stage}_KinesisLambdaRole
managedPolicies:
- arn:aws:iam::aws:policy/AdministratorAccess
resources:
Parameters:
DeliveryStreamName:
Type: String
Default: ${self:provider.stage}-test-kinesis-delivery-stream
Resources:
TestStream:
Type: AWS::Kinesis::Stream
Properties:
Name: ${self:provider.stage}-test-kinesis-data-stream
RetentionPeriodHours: 24
ShardCount: 1
S3DestinationBucket:
Type: AWS::S3::Bucket
Properties:
BucketName: !Sub "${self:provider.stage}-test-kinesis-destination"
CorsConfiguration:
CorsRules:
- AllowedHeaders: ["*"]
AllowedMethods: [GET, PUT, HEAD, POST, DELETE]
AllowedOrigins: ["*"]
KinesisDataStreamReadPolicy:
Type: AWS::IAM::ManagedPolicy
Properties:
ManagedPolicyName: !Sub "${self:provider.stage}_KinesisDataStreamReadPolicy"
PolicyDocument:
Version: "2012-10-17"
Statement:
- Effect: Allow
Action:
- kinesis:DescribeStream
- kinesis:PutRecord
- kinesis:PutRecords
- kinesis:GetShardIterator
- kinesis:GetRecords
- kinesis:DescribeStreamSummary
- kinesis:RegisterStreamConsumer
Resource:
- !Sub "arn:aws:kinesis:${AWS::Region}:${AWS::AccountId}:stream/${self:provider.stage}-test-kinesis-data-stream"
KinesisDataStreamReadRole:
Type: AWS::IAM::Role
Properties:
RoleName: !Sub "${self:provider.stage}_KinesisDataStreamReadRole"
AssumeRolePolicyDocument:
Version: "2012-10-17"
Statement:
- Effect: Allow
Principal:
Service:
- firehose.amazonaws.com
Action: sts:AssumeRole
ManagedPolicyArns:
- !Ref KinesisDataStreamReadPolicy
FirehoseExecutionS3Policy:
Type: AWS::IAM::ManagedPolicy
Properties:
ManagedPolicyName: !Sub "${self:provider.stage}_FirehoseExecutionS3Policy"
PolicyDocument:
Version: "2012-10-17"
Statement:
- Effect: Allow
Action:
- s3:PutObject
- s3:GetObject
- s3:ListBucketMultipartUploads
- s3:AbortMultipartUpload
- s3:PutbucketLogging
- s3:PutObjectVersionAcl
- s3:PutBucketAcl
- s3:PutBucketPolicy
- s3:ListBucket
- s3:GetBucketLocation
- s3:PutObjectAcl
Resource:
- !Sub "arn:aws:s3:::${self:provider.stage}-test-kinesis-destination/*"
- !Sub "arn:aws:s3:::${self:provider.stage}-test-kinesis-destination"
KinesisDataFirehoseDeliveryRole:
Type: AWS::IAM::Role
Properties:
RoleName: !Sub "${self:provider.stage}_KinesisDataFirehoseDeliveryRole"
AssumeRolePolicyDocument:
Version: "2012-10-17"
Statement:
- Effect: Allow
Principal:
Service:
- firehose.amazonaws.com
Action: sts:AssumeRole
ManagedPolicyArns:
- !Ref FirehoseExecutionS3Policy
Deliverystream:
Type: AWS::KinesisFirehose::DeliveryStream
Properties:
DeliveryStreamName: !Ref DeliveryStreamName
DeliveryStreamType: KinesisStreamAsSource
KinesisStreamSourceConfiguration:
KinesisStreamARN: !GetAtt TestStream.Arn
RoleARN: !GetAtt KinesisDataStreamReadRole.Arn
HttpEndpointDestinationConfiguration:
EndpointConfiguration:
Name: api-gateway-url
Url:
!Join [
"",
[
"https://",
!Ref HttpApi,
".execute-api.",
!Ref AWS::Region,
".",
!Ref AWS::URLSuffix,
!Sub "/${self:provider.stage}/kinesis/consumer",
],
]
CloudWatchLoggingOptions:
Enabled: true
LogGroupName:
!Join ["", [/aws/kinesisfirehose/, !Ref DeliveryStreamName]]
LogStreamName: DestinationDelivery
BufferingHints:
IntervalInSeconds: 60
SizeInMBs: 5
RequestConfiguration:
ContentEncoding: NONE
RetryOptions:
DurationInSeconds: 60
RoleARN: !GetAtt KinesisDataFirehoseDeliveryRole.Arn
S3BackupMode: FailedDataOnly
S3Configuration:
BucketARN: !GetAtt S3DestinationBucket.Arn
RoleARN: !GetAtt KinesisDataFirehoseDeliveryRole.Arn
CloudWatchLoggingOptions:
Enabled: true
LogGroupName:
!Join ["", [/aws/kinesisfirehose/, !Ref DeliveryStreamName]]
LogStreamName: BackupDelivery
ErrorOutputPrefix: error/
functions:
KinesisDataStreamProducer:
handler: handler.sendDataToKinesisDataStream
name: ${self:provider.stage}-${self:service}-data-producer
environment:
STREAM_NAME: !Ref TestStream
KinesisDataStreamConsumer:
handler: handler.consumeDataFromKinesisDataStream
name: ${self:provider.stage}-${self:service}-data-consumer
events:
- httpApi:
path: /${self:provider.stage}/kinesis/consumer
method: POST
项目地址:
https://github.com/JessicaWin/aws-kinesis-example/tree/master/s3-destination
Lambda prodcer & Lambda consumer:nodejs
'use strict'
const AWS = require('aws-sdk');
module.exports.sendDataToKinesisDataStream = async (event, context) => {
const kinesis = new AWS.Kinesis();
var params = {
Data: `${JSON.stringify(event)}`,
PartitionKey: 'test',
StreamName: process.env.STREAM_NAME
};
let data = await kinesis.putRecord(params).promise();
console.log(`${JSON.stringify(data)}`);
}
AWS Resources
service: aws-kinesis-example
provider:
name: aws
region: ${opt:region, 'ap-southeast-1'}
stage: ${opt:stage, 'develop'}
stackName: ${self:provider.stage}-${self:service}
runtime: nodejs14.x
memorySize: 1024
versionFunctions: false
iam:
role:
name: ${self:provider.stage}_KinesisLambdaRole
managedPolicies:
- arn:aws:iam::aws:policy/AdministratorAccess
resources:
Parameters:
DeliveryStreamName:
Type: String
Default: ${self:provider.stage}-test-kinesis-delivery-stream-s3
Resources:
TestStream:
Type: AWS::Kinesis::Stream
Properties:
Name: ${self:provider.stage}-test-kinesis-data-stream
RetentionPeriodHours: 24
ShardCount: 1
S3DestinationBucket:
Type: AWS::S3::Bucket
Properties:
BucketName: !Sub "${self:provider.stage}-test-kinesis-destination"
CorsConfiguration:
CorsRules:
- AllowedHeaders: ["*"]
AllowedMethods: [GET, PUT, HEAD, POST, DELETE]
AllowedOrigins: ["*"]
KinesisDataStreamReadPolicy:
Type: AWS::IAM::ManagedPolicy
Properties:
ManagedPolicyName: !Sub "${self:provider.stage}_KinesisDataStreamReadPolicy"
PolicyDocument:
Version: "2012-10-17"
Statement:
- Effect: Allow
Action:
- kinesis:DescribeStream
- kinesis:PutRecord
- kinesis:PutRecords
- kinesis:GetShardIterator
- kinesis:GetRecords
- kinesis:DescribeStreamSummary
- kinesis:RegisterStreamConsumer
Resource:
- !Sub "arn:aws:kinesis:${AWS::Region}:${AWS::AccountId}:stream/${self:provider.stage}-test-kinesis-data-stream"
KinesisDataStreamReadRole:
Type: AWS::IAM::Role
Properties:
RoleName: !Sub "${self:provider.stage}_KinesisDataStreamReadRole"
AssumeRolePolicyDocument:
Version: "2012-10-17"
Statement:
- Effect: Allow
Principal:
Service:
- firehose.amazonaws.com
Action: sts:AssumeRole
ManagedPolicyArns:
- !Ref KinesisDataStreamReadPolicy
KinesisDataFirehorseDeliveryPolicy:
Type: AWS::IAM::ManagedPolicy
Properties:
ManagedPolicyName: !Sub "${self:provider.stage}_KinesisDataFirehorseDeliveryPolicy"
PolicyDocument:
Version: "2012-10-17"
Statement:
- Effect: Allow
Action:
- "lambda:*"
Resource:
- "*"
FirehoseExecutionS3Policy:
Type: AWS::IAM::ManagedPolicy
Properties:
ManagedPolicyName: !Sub "${self:provider.stage}_FirehoseExecutionS3Policy"
PolicyDocument:
Version: "2012-10-17"
Statement:
- Effect: Allow
Action:
- s3:PutObject
- s3:GetObject
- s3:ListBucketMultipartUploads
- s3:AbortMultipartUpload
- s3:PutbucketLogging
- s3:PutObjectVersionAcl
- s3:PutBucketAcl
- s3:PutBucketPolicy
- s3:ListBucket
- s3:GetBucketLocation
- s3:PutObjectAcl
Resource:
- !Sub "arn:aws:s3:::${self:provider.stage}-test-kinesis-destination/*"
- !Sub "arn:aws:s3:::${self:provider.stage}-test-kinesis-destination"
KinesisDataFirehorseDeliveryRole:
Type: AWS::IAM::Role
Properties:
RoleName: !Sub "${self:provider.stage}_KinesisDataFirehorseDeliveryRole"
AssumeRolePolicyDocument:
Version: "2012-10-17"
Statement:
- Effect: Allow
Principal:
Service:
- firehose.amazonaws.com
Action: sts:AssumeRole
ManagedPolicyArns:
- !Ref KinesisDataFirehorseDeliveryPolicy
- !Ref FirehoseExecutionS3Policy
Deliverystream:
Type: AWS::KinesisFirehose::DeliveryStream
Properties:
DeliveryStreamName: !Ref DeliveryStreamName
DeliveryStreamType: KinesisStreamAsSource
KinesisStreamSourceConfiguration:
KinesisStreamARN: !GetAtt TestStream.Arn
RoleARN: !GetAtt KinesisDataStreamReadRole.Arn
S3DestinationConfiguration:
BucketARN: !GetAtt S3DestinationBucket.Arn
RoleARN: !GetAtt KinesisDataFirehorseDeliveryRole.Arn
BufferingHints:
IntervalInSeconds: 60
SizeInMBs: 5
ErrorOutputPrefix: error/
Prefix: success/
CloudWatchLoggingOptions:
Enabled: true
LogGroupName:
!Join ["", [/aws/kinesisfirehose/, !Ref DeliveryStreamName]]
LogStreamName: DestinationDelivery
functions:
KinesisDataStreamProducer:
handler: handler.sendDataToKinesisDataStream
name: ${self:provider.stage}-${self:service}-data-producer
environment:
STREAM_NAME: !Ref TestStream