在新建yml的时候务必用到yml格式验证,在看8080端口的时候好像有错误:
{"exceptionClass":"com.linkedin.restli.server.RestLiServiceException","stackTrace":"com.linkedin.restli.server.RestLiServiceException [HTTP Status:404]\n\tat com.linkedin.restli.server.RestLiServiceException.fromThrowable(RestLiServiceException.java:315)\n\tat com.linkedin.restli.server.BaseRestLiServer.buildPreRoutingError(BaseRestLiServer.java:158)\n\tat com.linkedin.restli.server.RestRestLiServer.buildPreRoutingRestException(RestRestLiServer.java:203)\n\tat com.linkedin.restli.server.RestRestLiServer.handleResourceRequest(RestRestLiServer.java:177)\n\tat com.linkedin.restli.server.RestRestLiServer.doHandleRequest(RestRestLiServer.java:164)\n\tat com.linkedin.restli.server.RestRestLiServer.handleRequest(RestRestLiServer.java:120)\n\tat com.linkedin.restli.server.RestLiServer.handleRequest(RestLiServer.java:132)\n\tat com.linkedin.restli.server.DelegatingTransportDispatcher.handleRestRequest(DelegatingTransportDispatcher.java:70)\n\tat com.linkedin.r2.filter.transport.DispatcherRequestFilter.onRestRequest(DispatcherRequestFilter.java:70)\n\tat com.linkedin.r2.filter.TimedRestFilter.onRestRequest(TimedRestFilter.java:72)\n\tat com.linkedin.r2.filter.FilterChainIterator$FilterChainRestIterator.doOnRequest(FilterChainIterator.java:146)\n\tat com.linkedin.r2.filter.FilterChainIterator$FilterChainRestIterator.doOnRequest(FilterChainIterator.java:132)\n\tat com.linkedin.r2.filter.FilterChainIterator.onRequest(FilterChainIterator.java:62)\n\tat com.linkedin.r2.filter.TimedNextFilter.onRequest(TimedNextFilter.java:55)\n\tat com.linkedin.r2.filter.transport.ServerQueryTunnelFilter.onRestRequest(ServerQueryTunnelFilter.java:58)\n\tat com.linkedin.r2.filter.TimedRestFilter.onRestRequest(TimedRestFilter.java:72)\n\tat com.linkedin.r2.filter.FilterChainIterator$FilterChainRestIterator.doOnRequest(FilterChainIterator.java:146)\n\tat com.linkedin.r2.filter.FilterChainIterator$FilterChainRestIterator.doOnRequest(FilterChainIterator.java:132)\n\tat com.linkedin.r2.filter.FilterChainIterator.onRequest(FilterChainIterator.java:62)\n\tat com.linkedin.r2.filter.TimedNextFilter.onRequest(TimedNextFilter.java:55)\n\tat com.linkedin.r2.filter.message.rest.RestFilter.onRestRequest(RestFilter.java:50)\n\tat com.linkedin.r2.filter.TimedRestFilter.onRestRequest(TimedRestFilter.java:72)\n\tat com.linkedin.r2.filter.FilterChainIterator$FilterChainRestIterator.doOnRequest(FilterChainIterator.java:146)\n\tat com.linkedin.r2.filter.FilterChainIterator$FilterChainRestIterator.doOnRequest(FilterChainIterator.java:132)\n\tat com.linkedin.r2.filter.FilterChainIterator.onRequest(FilterChainIterator.java:62)\n\tat com.linkedin.r2.filter.FilterChainImpl.onRestRequest(FilterChainImpl.java:96)\n\tat com.linkedin.r2.filter.transport.FilterChainDispatcher.handleRestRequest(FilterChainDispatcher.java:75)\n\tat com.linkedin.r2.util.finalizer.RequestFinalizerDispatcher.handleRestRequest(RequestFinalizerDispatcher.java:61)\n\tat com.linkedin.r2.transport.http.server.HttpDispatcher.handleRequest(HttpDispatcher.java:101)\n\tat com.linkedin.r2.transport.http.server.AbstractR2Servlet.service(AbstractR2Servlet.java:105)\n\tat javax.servlet.http.HttpServlet.service(HttpServlet.java:790)\n\tat com.linkedin.restli.server.spring.ParallelRestliHttpRequestHandler.handleRequest(ParallelRestliHttpRequestHandler.java:63)\n\tat org.springframework.web.context.support.HttpRequestHandlerServlet.service(HttpRequestHandlerServlet.java:73)\n\tat javax.servlet.http.HttpServlet.service(HttpServlet.java:790)\n\tat org.eclipse.jetty.servlet.ServletHolder.handle(ServletHolder.java:852)\n\tat org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:544)\n\tat org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:143)\n\tat org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:536)\n\tat org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:127)\n\tat org.eclipse.jetty.server.handler.ScopedHandler.nextHandle(ScopedHandler.java:235)\n\tat org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:1581)\n\tat org.eclipse.jetty.server.handler.ScopedHandler.nextHandle(ScopedHandler.java:233)\n\tat org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1307)\n\tat org.eclipse.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:188)\n\tat org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:482)\n\tat org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:1549)\n\tat org.eclipse.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:186)\n\tat org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1204)\n\tat org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141)\n\tat org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:221)\n\tat org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:146)\n\tat org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:127)\n\tat org.eclipse.jetty.server.Server.handle(Server.java:494)\n\tat org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:374)\n\tat org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:268)\n\tat org.eclipse.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:311)\n\tat org.eclipse.jetty.io.FillInterest.fillable(FillInterest.java:103)\n\tat org.eclipse.jetty.io.ChannelEndPoint$2.run(ChannelEndPoint.java:117)\n\tat org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.runTask(EatWhatYouKill.java:336)\n\tat org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.doProduce(EatWhatYouKill.java:313)\n\tat org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.tryProduce(EatWhatYouKill.java:171)\n\tat org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.run(EatWhatYouKill.java:129)\n\tat org.eclipse.jetty.util.thread.ReservedThreadExecutor$ReservedThread.run(ReservedThreadExecutor.java:367)\n\tat org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:782)\n\tat org.eclipse.jetty.util.thread.QueuedThreadPool$Runner.run(QueuedThreadPool.java:918)\n\tat java.lang.Thread.run(Thread.java:748)\nCaused by: com.linkedin.restli.server.RoutingException\n\tat com.linkedin.restli.internal.server.RestLiRouter.process(RestLiRouter.java:111)\n\tat com.linkedin.restli.server.BaseRestLiServer.getRoutingResult(BaseRestLiServer.java:139)\n\tat com.linkedin.restli.server.RestRestLiServer.handleResourceRequest(RestRestLiServer.java:173)\n\t... 62 more\n","status":404}
不折腾了,导入文件类型的没有问题,导入mysql之类的好像是上面那个docker有问题,不知道怎么调试。收摊
装载测试数据
python3 -m datahub docker ingest-sample-data
装载mysql数据
python3 -m datahub ingest -c /app/datahub_yml/test_mysql.yml
报错,原因好像与上面的无关!!!!yml格式的问题!!!见附图,一定要注意yml格式的解析
python3 -m datahub ingest -c /app/datahub_yml/test_mysql.yml
10 validation errors for PipelineConfig
source
none is not an allowed value (type=type_error.none.not_allowed)
allow
extra fields not permitted (type=value_error.extra)
config
extra fields not permitted (type=value_error.extra)
database
extra fields not permitted (type=value_error.extra)
database_alias
extra fields not permitted (type=value_error.extra)
host_port
extra fields not permitted (type=value_error.extra)
password
extra fields not permitted (type=value_error.extra)
schema_pattern
extra fields not permitted (type=value_error.extra)
type
extra fields not permitted (type=value_error.extra)
username
extra fields not permitted (type=value_error.extra)
官方模板,这个是默认的内置的mysql
---
# see https://datahubproject.io/docs/metadata-ingestion/source_docs/mysql for complete documentation
source:
type: "mysql"
config:
username: datahub
password: datahub
# see https://datahubproject.io/docs/metadata-ingestion/sink_docs/datahub for complete documentation
sink:
type: "datahub-rest"
config:
server: "http://localhost:8080"
元数据的删除
查看最近的导入
python3 -m datahub ingest list-runs
No ~/.datahubenv file found, generating one for you...
+--------------------------------------+--------+---------------------+
| runId | rows | created at |
+======================================+========+=====================+
| befcb37e-1c44-11ec-997c-000c297f660f | 2948 | 2021-09-23 08:06:16 |
+--------------------------------------+--------+---------------------+
| 80d3f1c6-1c43-11ec-9382-000c297f660f | 250 | 2021-09-23 07:54:54 |
+--------------------------------------+--------+---------------------+
| no-run-id-provided | 19 | 2021-09-23 04:00:48 |
+--------------------------------------+--------+---------------------+
python3 -m datahub ingest rollback --run-id no-run-id-provided
python3 -m datahub ingest rollback --run-id no-run-id-provided
This will permanently delete data from DataHub. Do you want to continue? [y/N]: y
rolling back deletes the entities created by a run and reverts the updated aspects
this rollback deleted 0 entities and rolled back 19 aspects
showing first 19 of 19 aspects reverted by this run
+-------------------------------+------------------+---------------------+
| urn | aspect name | created at |
+===============================+==================+=====================+
| urn:li:dataPlatform:postgres | dataPlatformInfo | 2021-09-23 04:00:47 |
+-------------------------------+------------------+---------------------+
| urn:li:dataPlatform:presto | dataPlatformInfo | 2021-09-23 04:00:47 |
+-------------------------------+------------------+---------------------+
| urn:li:dataPlatform:teradata | dataPlatformInfo | 2021-09-23 04:00:47 |
+-------------------------------+------------------+---------------------+
| urn:li:dataPlatform:voldemort | dataPlatformInfo | 2021-09-23 04:00:47 |
+-------------------------------+------------------+---------------------+
| urn:li:dataPlatform:snowflake | dataPlatformInfo | 2021-09-23 04:00:47 |
+-------------------------------+------------------+---------------------+
| urn:li:dataPlatform:redshift | dataPlatformInfo | 2021-09-23 04:00:47 |
+-------------------------------+------------------+---------------------+
| urn:li:dataPlatform:mssql | dataPlatformInfo | 2021-09-23 04:00:47 |
+-------------------------------+------------------+---------------------+
| urn:li:dataPlatform:bigquery | dataPlatformInfo | 2021-09-23 04:00:47 |
+-------------------------------+------------------+---------------------+
| urn:li:dataPlatform:druid | dataPlatformInfo | 2021-09-23 04:00:47 |
+-------------------------------+------------------+---------------------+
| urn:li:dataPlatform:looker | dataPlatformInfo | 2021-09-23 04:00:47 |
+-------------------------------+------------------+---------------------+
| urn:li:dataPlatform:feast | dataPlatformInfo | 2021-09-23 04:00:47 |
+-------------------------------+------------------+---------------------+
| urn:li:dataPlatform:sagemaker | dataPlatformInfo | 2021-09-23 04:00:47 |
+-------------------------------+------------------+---------------------+
| urn:li:dataPlatform:glue | dataPlatformInfo | 2021-09-23 04:00:47 |
+-------------------------------+------------------+---------------------+
| urn:li:dataPlatform:redash | dataPlatformInfo | 2021-09-23 04:00:48 |
+-------------------------------+------------------+---------------------+
| urn:li:dataPlatform:athena | dataPlatformInfo | 2021-09-23 04:00:48 |
+-------------------------------+------------------+---------------------+
| urn:li:dataPlatform:mongodb | dataPlatformInfo | 2021-09-23 04:00:47 |
+-------------------------------+------------------+---------------------+
| urn:li:dataPlatform:mysql | dataPlatformInfo | 2021-09-23 04:00:47 |
+-------------------------------+------------------+---------------------+
| urn:li:dataPlatform:oracle | dataPlatformInfo | 2021-09-23 04:00:47 |
+-------------------------------+------------------+---------------------+
| urn:li:dataPlatform:pinot | dataPlatformInfo | 2021-09-23 04:00:47 |
+-------------------------------+------------------+---------------------+
目前遇到的是greenplum无法导入,不知道原因,mysql正常了。