DataHub 0.8.14.1安装排错及数据导入

在新建yml的时候务必用到yml格式验证,在看8080端口的时候好像有错误:

{"exceptionClass":"com.linkedin.restli.server.RestLiServiceException","stackTrace":"com.linkedin.restli.server.RestLiServiceException [HTTP Status:404]\n\tat com.linkedin.restli.server.RestLiServiceException.fromThrowable(RestLiServiceException.java:315)\n\tat com.linkedin.restli.server.BaseRestLiServer.buildPreRoutingError(BaseRestLiServer.java:158)\n\tat com.linkedin.restli.server.RestRestLiServer.buildPreRoutingRestException(RestRestLiServer.java:203)\n\tat com.linkedin.restli.server.RestRestLiServer.handleResourceRequest(RestRestLiServer.java:177)\n\tat com.linkedin.restli.server.RestRestLiServer.doHandleRequest(RestRestLiServer.java:164)\n\tat com.linkedin.restli.server.RestRestLiServer.handleRequest(RestRestLiServer.java:120)\n\tat com.linkedin.restli.server.RestLiServer.handleRequest(RestLiServer.java:132)\n\tat com.linkedin.restli.server.DelegatingTransportDispatcher.handleRestRequest(DelegatingTransportDispatcher.java:70)\n\tat com.linkedin.r2.filter.transport.DispatcherRequestFilter.onRestRequest(DispatcherRequestFilter.java:70)\n\tat com.linkedin.r2.filter.TimedRestFilter.onRestRequest(TimedRestFilter.java:72)\n\tat com.linkedin.r2.filter.FilterChainIterator$FilterChainRestIterator.doOnRequest(FilterChainIterator.java:146)\n\tat com.linkedin.r2.filter.FilterChainIterator$FilterChainRestIterator.doOnRequest(FilterChainIterator.java:132)\n\tat com.linkedin.r2.filter.FilterChainIterator.onRequest(FilterChainIterator.java:62)\n\tat com.linkedin.r2.filter.TimedNextFilter.onRequest(TimedNextFilter.java:55)\n\tat com.linkedin.r2.filter.transport.ServerQueryTunnelFilter.onRestRequest(ServerQueryTunnelFilter.java:58)\n\tat com.linkedin.r2.filter.TimedRestFilter.onRestRequest(TimedRestFilter.java:72)\n\tat com.linkedin.r2.filter.FilterChainIterator$FilterChainRestIterator.doOnRequest(FilterChainIterator.java:146)\n\tat com.linkedin.r2.filter.FilterChainIterator$FilterChainRestIterator.doOnRequest(FilterChainIterator.java:132)\n\tat com.linkedin.r2.filter.FilterChainIterator.onRequest(FilterChainIterator.java:62)\n\tat com.linkedin.r2.filter.TimedNextFilter.onRequest(TimedNextFilter.java:55)\n\tat com.linkedin.r2.filter.message.rest.RestFilter.onRestRequest(RestFilter.java:50)\n\tat com.linkedin.r2.filter.TimedRestFilter.onRestRequest(TimedRestFilter.java:72)\n\tat com.linkedin.r2.filter.FilterChainIterator$FilterChainRestIterator.doOnRequest(FilterChainIterator.java:146)\n\tat com.linkedin.r2.filter.FilterChainIterator$FilterChainRestIterator.doOnRequest(FilterChainIterator.java:132)\n\tat com.linkedin.r2.filter.FilterChainIterator.onRequest(FilterChainIterator.java:62)\n\tat com.linkedin.r2.filter.FilterChainImpl.onRestRequest(FilterChainImpl.java:96)\n\tat com.linkedin.r2.filter.transport.FilterChainDispatcher.handleRestRequest(FilterChainDispatcher.java:75)\n\tat com.linkedin.r2.util.finalizer.RequestFinalizerDispatcher.handleRestRequest(RequestFinalizerDispatcher.java:61)\n\tat com.linkedin.r2.transport.http.server.HttpDispatcher.handleRequest(HttpDispatcher.java:101)\n\tat com.linkedin.r2.transport.http.server.AbstractR2Servlet.service(AbstractR2Servlet.java:105)\n\tat javax.servlet.http.HttpServlet.service(HttpServlet.java:790)\n\tat com.linkedin.restli.server.spring.ParallelRestliHttpRequestHandler.handleRequest(ParallelRestliHttpRequestHandler.java:63)\n\tat org.springframework.web.context.support.HttpRequestHandlerServlet.service(HttpRequestHandlerServlet.java:73)\n\tat javax.servlet.http.HttpServlet.service(HttpServlet.java:790)\n\tat org.eclipse.jetty.servlet.ServletHolder.handle(ServletHolder.java:852)\n\tat org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:544)\n\tat org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:143)\n\tat org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:536)\n\tat org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:127)\n\tat org.eclipse.jetty.server.handler.ScopedHandler.nextHandle(ScopedHandler.java:235)\n\tat org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:1581)\n\tat org.eclipse.jetty.server.handler.ScopedHandler.nextHandle(ScopedHandler.java:233)\n\tat org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1307)\n\tat org.eclipse.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:188)\n\tat org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:482)\n\tat org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:1549)\n\tat org.eclipse.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:186)\n\tat org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1204)\n\tat org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141)\n\tat org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:221)\n\tat org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:146)\n\tat org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:127)\n\tat org.eclipse.jetty.server.Server.handle(Server.java:494)\n\tat org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:374)\n\tat org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:268)\n\tat org.eclipse.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:311)\n\tat org.eclipse.jetty.io.FillInterest.fillable(FillInterest.java:103)\n\tat org.eclipse.jetty.io.ChannelEndPoint$2.run(ChannelEndPoint.java:117)\n\tat org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.runTask(EatWhatYouKill.java:336)\n\tat org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.doProduce(EatWhatYouKill.java:313)\n\tat org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.tryProduce(EatWhatYouKill.java:171)\n\tat org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.run(EatWhatYouKill.java:129)\n\tat org.eclipse.jetty.util.thread.ReservedThreadExecutor$ReservedThread.run(ReservedThreadExecutor.java:367)\n\tat org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:782)\n\tat org.eclipse.jetty.util.thread.QueuedThreadPool$Runner.run(QueuedThreadPool.java:918)\n\tat java.lang.Thread.run(Thread.java:748)\nCaused by: com.linkedin.restli.server.RoutingException\n\tat com.linkedin.restli.internal.server.RestLiRouter.process(RestLiRouter.java:111)\n\tat com.linkedin.restli.server.BaseRestLiServer.getRoutingResult(BaseRestLiServer.java:139)\n\tat com.linkedin.restli.server.RestRestLiServer.handleResourceRequest(RestRestLiServer.java:173)\n\t... 62 more\n","status":404}

不折腾了,导入文件类型的没有问题,导入mysql之类的好像是上面那个docker有问题,不知道怎么调试。收摊
装载测试数据

python3 -m datahub docker ingest-sample-data

装载mysql数据

python3 -m datahub ingest -c /app/datahub_yml/test_mysql.yml

报错,原因好像与上面的无关!!!!yml格式的问题!!!见附图,一定要注意yml格式的解析

 python3 -m datahub ingest -c /app/datahub_yml/test_mysql.yml
10 validation errors for PipelineConfig
source
  none is not an allowed value (type=type_error.none.not_allowed)
allow
  extra fields not permitted (type=value_error.extra)
config
  extra fields not permitted (type=value_error.extra)
database
  extra fields not permitted (type=value_error.extra)
database_alias
  extra fields not permitted (type=value_error.extra)
host_port
  extra fields not permitted (type=value_error.extra)
password
  extra fields not permitted (type=value_error.extra)
schema_pattern
  extra fields not permitted (type=value_error.extra)
type
  extra fields not permitted (type=value_error.extra)
username
  extra fields not permitted (type=value_error.extra)

官方模板,这个是默认的内置的mysql

---
# see https://datahubproject.io/docs/metadata-ingestion/source_docs/mysql for complete documentation
source:
  type: "mysql"
  config:
    username: datahub
    password: datahub

# see https://datahubproject.io/docs/metadata-ingestion/sink_docs/datahub for complete documentation
sink:
  type: "datahub-rest"
  config:
    server: "http://localhost:8080"
微信截图_20210923162800.png

元数据的删除

查看最近的导入

python3 -m datahub ingest list-runs
No ~/.datahubenv file found, generating one for you...
+--------------------------------------+--------+---------------------+
| runId                                |   rows | created at          |
+======================================+========+=====================+
| befcb37e-1c44-11ec-997c-000c297f660f |   2948 | 2021-09-23 08:06:16 |
+--------------------------------------+--------+---------------------+
| 80d3f1c6-1c43-11ec-9382-000c297f660f |    250 | 2021-09-23 07:54:54 |
+--------------------------------------+--------+---------------------+
| no-run-id-provided                   |     19 | 2021-09-23 04:00:48 |
+--------------------------------------+--------+---------------------+

python3 -m datahub ingest rollback --run-id no-run-id-provided

python3 -m  datahub ingest rollback --run-id no-run-id-provided 
This will permanently delete data from DataHub. Do you want to continue? [y/N]: y
rolling back deletes the entities created by a run and reverts the updated aspects
this rollback deleted 0 entities and rolled back 19 aspects
showing first 19 of 19 aspects reverted by this run
+-------------------------------+------------------+---------------------+
| urn                           | aspect name      | created at          |
+===============================+==================+=====================+
| urn:li:dataPlatform:postgres  | dataPlatformInfo | 2021-09-23 04:00:47 |
+-------------------------------+------------------+---------------------+
| urn:li:dataPlatform:presto    | dataPlatformInfo | 2021-09-23 04:00:47 |
+-------------------------------+------------------+---------------------+
| urn:li:dataPlatform:teradata  | dataPlatformInfo | 2021-09-23 04:00:47 |
+-------------------------------+------------------+---------------------+
| urn:li:dataPlatform:voldemort | dataPlatformInfo | 2021-09-23 04:00:47 |
+-------------------------------+------------------+---------------------+
| urn:li:dataPlatform:snowflake | dataPlatformInfo | 2021-09-23 04:00:47 |
+-------------------------------+------------------+---------------------+
| urn:li:dataPlatform:redshift  | dataPlatformInfo | 2021-09-23 04:00:47 |
+-------------------------------+------------------+---------------------+
| urn:li:dataPlatform:mssql     | dataPlatformInfo | 2021-09-23 04:00:47 |
+-------------------------------+------------------+---------------------+
| urn:li:dataPlatform:bigquery  | dataPlatformInfo | 2021-09-23 04:00:47 |
+-------------------------------+------------------+---------------------+
| urn:li:dataPlatform:druid     | dataPlatformInfo | 2021-09-23 04:00:47 |
+-------------------------------+------------------+---------------------+
| urn:li:dataPlatform:looker    | dataPlatformInfo | 2021-09-23 04:00:47 |
+-------------------------------+------------------+---------------------+
| urn:li:dataPlatform:feast     | dataPlatformInfo | 2021-09-23 04:00:47 |
+-------------------------------+------------------+---------------------+
| urn:li:dataPlatform:sagemaker | dataPlatformInfo | 2021-09-23 04:00:47 |
+-------------------------------+------------------+---------------------+
| urn:li:dataPlatform:glue      | dataPlatformInfo | 2021-09-23 04:00:47 |
+-------------------------------+------------------+---------------------+
| urn:li:dataPlatform:redash    | dataPlatformInfo | 2021-09-23 04:00:48 |
+-------------------------------+------------------+---------------------+
| urn:li:dataPlatform:athena    | dataPlatformInfo | 2021-09-23 04:00:48 |
+-------------------------------+------------------+---------------------+
| urn:li:dataPlatform:mongodb   | dataPlatformInfo | 2021-09-23 04:00:47 |
+-------------------------------+------------------+---------------------+
| urn:li:dataPlatform:mysql     | dataPlatformInfo | 2021-09-23 04:00:47 |
+-------------------------------+------------------+---------------------+
| urn:li:dataPlatform:oracle    | dataPlatformInfo | 2021-09-23 04:00:47 |
+-------------------------------+------------------+---------------------+
| urn:li:dataPlatform:pinot     | dataPlatformInfo | 2021-09-23 04:00:47 |
+-------------------------------+------------------+---------------------+

目前遇到的是greenplum无法导入,不知道原因,mysql正常了。

你可能感兴趣的:(DataHub 0.8.14.1安装排错及数据导入)