20190708工作进展

  1. create table hs_uc_bhtb_xhs_topic_t_url_info_ as select row_number()over() as video_id, ori_url, oss_url from zj_uc_bhtb_xhs_topic_t_url_info_ where ds=20190705;

  2. create table hs_video_image_out(videoID bigint, frameID bigint, img string, shot_id bigint) partitioned by (ds string) lifecycle 14;

  3. create table hs_video_extra_info(videoID bigint, extra_info string) partitioned by (ds string) lifecycle 14;

  4. fs -mkv hs_xhs_audio_volumn;

pai -name imgtoolkit_video_test -project algo_public_dev -DinputTableName="graph_embedding.hs_uc_bhtb_xhs_topic_t_url_info_" -DoutputTableName="graph_embedding.hs_video_image_out" -DoutputExtraTableName="graph_embedding.hs_video_extra_info" -Dvolumn="graph_embedding/volumes/hs_xhs_audio_volumn" -DvolumnPartition="hs_audio_0" -DinFeatureSchema="video_id:item,oss_url:url" -Dfunction=8 -DisDebug=true -DhasAudio=False -Dwidth=480 -Dheight=360 -DgpuCount=1 -DisShot=true -DworkerCount=100 -DisOutVolume1=true -DkeyFreq=3 -DisLog=true -DtimeOut="100000000";

pai -name pytorch -project algo_public_dev -Dpython=3.6 -Dscript="file:///apsarapangu/disk1/hengsong.lhs/origin_deep_cluster_odps_5.tar.gz" -DentryFile="clusterUsingSameCenter.py" -Dtables="odps://graph_embedding/tables/hs_jingyan_query_related_video_pool_2_3,odps://graph_embedding/tables/hs_jingyan_query_related_top_query_1" -Doutputs="odps://graph_embedding/tables/hs_jingyan_query_cluster_result_title3,odps://graph_embedding/tables/hs_jingyan_query_cluster_result_title3_0,odps://graph_embedding/tables/hs_jingyan_query_cluster_result_title3_1,odps://graph_embedding/tables/hs_jingyan_query_cluster_result_title3_2,odps://graph_embedding/tables/hs_jingyan_query_cluster_result_title3_3,odps://graph_embedding/tables/hs_jingyan_query_cluster_result_title3_4,odps://graph_embedding/tables/hs_jingyan_query_cluster_result_title3_5,odps://graph_embedding/tables/hs_jingyan_query_cluster_result_title3_6,odps://graph_embedding/tables/hs_jingyan_query_cluster_result_title3_7,odps://graph_embedding/tables/hs_jingyan_query_cluster_result_title3_8" -Dbucket="oss://bucket-automl/" -Darn="acs:ram::1293303983251548:role/graph2018" -Dhost="cn-hangzhou.oss-internal.aliyun-inc.com" -DuserDefinedParameters="" -DworkerCount=10;

  1. 尝试使用固定中心点的方式进行聚类,效果不好,表现在网络不收敛,收敛结果差,收敛到的loss函数的值是之前的几百倍。47_soft结果在:hs_jingyan_query_cluster_result_title3;

  2. 100_hard 固定中心点

https://logview.alibaba-inc.com/logview/?h=http://service-corp.odps.aliyun-inc.com/api&p=graph_embedding&i=20190708093823787gg1kw8y_d5fa297f_5e5b_428b_ad95_5eb855544249&token=cjN5aEJoRXlMbU1taXUxd3NuakJDMGlDNFgwPSxPRFBTX09CTzoxMjkzMzAzOTgzMjUxNTQ4LDE1NjMxODM1MDkseyJTdGF0ZW1lbnQiOlt7IkFjdGlvbiI6WyJvZHBzOlJlYWQiXSwiRWZmZWN0IjoiQWxsb3ciLCJSZXNvdXJjZSI6WyJhY3M6b2RwczoqOnByb2plY3RzL2dyYXBoX2VtYmVkZGluZy9pbnN0YW5jZXMvMjAxOTA3MDgwOTM4MjM3ODdnZzFrdzh5X2Q1ZmEyOTdmXzVlNWJfNDI4Yl9hZDk1XzVlYjg1NTU0NDI0OSJdfV0sIlZlcnNpb24iOiIxIn0=

结果会在:hs_jingyan_query_cluster_result_title2

  1. 100 hard 不固定中心点

pai -name pytorch -project algo_public_dev -Dpython=3.6 -Dscript="file:///apsarapangu/disk1/hengsong.lhs/origin_deep_cluster_odps_5.tar.gz" -DentryFile="clusterUsingPrecenter.py" -Dtables="odps://graph_embedding/tables/hs_jingyan_query_related_video_pool_2_3,odps://graph_embedding/tables/hs_jingyan_query_related_top_query_1" -Doutputs="odps://graph_embedding/tables/hs_jingyan_query_cluster_result_title4,odps://graph_embedding/tables/hs_jingyan_query_cluster_result_title4_0,odps://graph_embedding/tables/hs_jingyan_query_cluster_result_title4_1,odps://graph_embedding/tables/hs_jingyan_query_cluster_result_title4_2,odps://graph_embedding/tables/hs_jingyan_query_cluster_result_title4_3,odps://graph_embedding/tables/hs_jingyan_query_cluster_result_title4_4" -Dbucket="oss://bucket-automl/" -Darn="acs:ram::1293303983251548:role/graph2018" -Dhost="cn-hangzhou.oss-internal.aliyun-inc.com" -DuserDefinedParameters="" -DworkerCount=10;

http://logview.odps.aliyun-inc.com:8080/logview/?h=http://service-corp.odps.aliyun-inc.com/api&p=graph_embedding&i=20190708124900130g9rlw8y_730f9c35_ad10_4c6e_9a03_45bccd488d10&token=Mm03K3ZYTkNuY2hjU0Nnc3dwWElvZ2FEZUQ4PSxPRFBTX09CTzoxMjkzMzAzOTgzMjUxNTQ4LDE1NjMxOTQ5NDUseyJTdGF0ZW1lbnQiOlt7IkFjdGlvbiI6WyJvZHBzOlJlYWQiXSwiRWZmZWN0IjoiQWxsb3ciLCJSZXNvdXJjZSI6WyJhY3M6b2RwczoqOnByb2plY3RzL2dyYXBoX2VtYmVkZGluZy9pbnN0YW5jZXMvMjAxOTA3MDgxMjQ5MDAxMzBnOXJsdzh5XzczMGY5YzM1X2FkMTBfNGM2ZV85YTAzXzQ1YmNjZDQ4OGQxMCJdfV0sIlZlcnNpb24iOiIxIn0=

  1. 构建video_emb表

create table hs_xhs_video_emb as select *, yuyan_udf_resnet50_fullcate_fc(search_offline:imgto1d_yuyan_python(ss_transform_base64(img))) as video_feature from hs_video_image_out;

http://logview.odps.aliyun-inc.com:8080/logview/?h=http://service-corp.odps.aliyun-inc.com/api&p=graph_embedding&i=20190708115757790gdmfvhfj2&token=b2RpdnlKVC9nQlRWTit0cmJtbzB5emNiYk5ZPSxPRFBTX09CTzoxMjkzMzAzOTgzMjUxNTQ4LDE1NjMxOTE4NzgseyJTdGF0ZW1lbnQiOlt7IkFjdGlvbiI6WyJvZHBzOlJlYWQiXSwiRWZmZWN0IjoiQWxsb3ciLCJSZXNvdXJjZSI6WyJhY3M6b2RwczoqOnByb2plY3RzL2dyYXBoX2VtYmVkZGluZy9pbnN0YW5jZXMvMjAxOTA3MDgxMTU3NTc3OTBnZG1mdmhmajIiXX1dLCJWZXJzaW9uIjoiMSJ9

你可能感兴趣的:(20190708工作进展)