FastText中文词向量的使用

faxttext中文

词向量下载地址

调用方法

官方文档

from gensim.models.keyedvectors import FastTextKeyedVectors
wv = FastTextKeyedVectors.load("data/fasttext/cc.zh.ftv")
wv.get_vector("齐次方程的通解")
Out[4]: 
array([-0.02770528, -0.01940057, -0.00155423, -0.00623327, -0.03702749,
       -0.00690734,  0.00132118, -0.03920605,  0.0003689 ,  0.02432604,
       -0.00343145, -0.00634525,  0.01728246, -0.01439409,  0.00236067,
       -0.00289405,  0.02843424, -0.00097948,  0.00583924,  0.01231529,
       -0.03100128,  0.01156239, -0.00680694,  0.00094301, -0.01547958,
        0.0077299 ,  0.00751489,  0.01592178, -0.03957854,  0.00964979,
        0.0044807 , -0.01453753,  0.00710117,  0.01314075,  0.01425264,
        0.03803091, -0.0091995 , -0.0136196 ,  0.0045022 , -0.022723  ,
       -0.00032682, -0.00499234, -0.01086006,  0.0011895 , -0.03108007,
       -0.00415922, -0.01814952, -0.00455354,  0.00497562,  0.01084334,
        0.01273701,  0.00389506,  0.00847115,  0.00440748, -0.01313938,
        0.01552561,  0.00277355, -0.01287873,  0.00466544, -0.0073945 ,
        0.01031001, -0.00748088, -0.00804983,  0.02573756,  0.00877401,
        0.01190013, -0.00954799, -0.02087725,  0.02140302, -0.02945632,
        0.00132381, -0.02526976,  0.0159622 ,  0.0016231 , -0.00555376,
        0.02165438, -0.01303895,  0.02886597,  0.01332908,  0.02119413,
        0.0217433 ,  0.01670258,  0.02963465,  0.00951478, -0.02103903,
        0.02111733, -0.00011307,  0.02238687,  0.01577717, -0.02792673,
        0.01054026,  0.00523125,  0.00144492, -0.01466708, -0.02596621,
       -0.00096878,  0.00942538, -0.01048814, -0.02157109,  0.03101451,
        0.00557971,  0.00886945,  0.00436569, -0.01355057, -0.00414133,
       -0.0238845 ,  0.00524544, -0.0121206 , -0.00595451, -0.00850328,
       -0.01070436,  0.03202352, -0.02924015, -0.00939905, -0.00973766,
        0.03194467,  0.01241933,  0.01057136,  0.0061421 , -0.01014839,
        0.01046439,  0.00259275,  0.02567853,  0.00913086,  0.00491641,
       -0.00776988, -0.00802085,  0.00465507,  0.02730826,  0.02127333,
       -0.02182401,  0.01569831,  0.01977419, -0.00087911, -0.01346703,
        0.00718763, -0.00223741, -0.00895895,  0.01081999,  0.02379143,
        0.0131931 , -0.00668924, -0.00956822,  0.01767019, -0.06572688,
        0.00525941,  0.03237703, -0.00212714,  0.00381643,  0.02148647,
        0.03570746, -0.01569255,  0.00930035,  0.02522128,  0.00313984,
       -0.00214189,  0.02906055, -0.02909016,  0.02421694, -0.02852829,
       -0.00023564,  0.03686526, -0.01195082,  0.0030988 ,  0.00752129,
       -0.01934787, -0.00400132,  0.01888735, -0.01827173,  0.04846204,
        0.03962749,  0.00959804, -0.02653589,  0.01485417, -0.01556092,
       -0.01371702,  0.00330527,  0.01246037, -0.0034099 , -0.0137812 ,
        0.00327434,  0.00292469, -0.00094473,  0.0043035 , -0.01822643,
       -0.01303349,  0.02134749,  0.02018705, -0.01369897,  0.05368842,
        0.0196601 , -0.01711259, -0.00048394, -0.00561686, -0.01467973,
        0.01123364, -0.03325021, -0.01721912,  0.04285329, -0.00715051,
       -0.00485801,  0.01880258, -0.01212107,  0.01101887,  0.01492605,
        0.01348591, -0.00113208, -0.00581867, -0.00868604,  0.00326946,
       -0.01378596,  0.01499698, -0.00037066, -0.01323319,  0.01549012,
        0.00025271, -0.01112719, -0.01482856,  0.01116403,  0.0012931 ,
       -0.01440499,  0.00407554, -0.02058306, -0.02844767, -0.00558925,
       -0.00789439, -0.01337096, -0.00483002,  0.01600403,  0.01178123,
       -0.00214079,  0.04410514,  0.00527514, -0.00495032,  0.00473957,
       -0.02659005,  0.04083819,  0.02417161,  0.00695672, -0.01637241,
        0.03720611, -0.01910962,  0.00214158,  0.00553989,  0.00398967,
       -0.01670013,  0.00065715, -0.00940218, -0.01750198,  0.0381637 ,
       -0.00782963,  0.02342749, -0.01228353, -0.0020781 , -0.00244336,
       -0.00570503,  0.01906461,  0.02160338,  0.00882312,  0.00203404,
       -0.00310014, -0.01708252, -0.01386752,  0.00605795,  0.004504  ,
        0.00690902,  0.01037846, -0.00808828, -0.00048305, -0.01928573,
        0.03381735,  0.0037837 ,  0.00265101,  0.02909622,  0.00188754,
       -0.01172526, -0.02343269, -0.03091168,  0.01300145, -0.02605992,
       -0.04595874, -0.01210633,  0.00022056,  0.00138075, -0.00716891,
        0.00127432,  0.0145096 , -0.00135081, -0.01810584, -0.04203109,
        0.03009902, -0.01527129, -0.00190694, -0.02493262,  0.06431199,
       -0.00668367, -0.0072659 , -0.02381454, -0.04280134, -0.02137613],
      dtype=float32)
wv.vocab
Out[6]: 
{',': <gensim.models.keyedvectors.Vocab at 0x17bf7c7e0f0>,
 '的': <gensim.models.keyedvectors.Vocab at 0x17bf7e3dba8>,
 '。': <gensim.models.keyedvectors.Vocab at 0x17beb3ad940>,
 '': <gensim.models.keyedvectors.Vocab at 0x17beb3ad080>,
 '、': <gensim.models.keyedvectors.Vocab at 0x17bf7eb2f98>,
 '是': <gensim.models.keyedvectors.Vocab at 0x17bf7eb2fd0>,
 '一': <gensim.models.keyedvectors.Vocab at 0x17bf7ec1048>,
 '在': <gensim.models.keyedvectors.Vocab at 0x17bf7ec1080>,
 ':': <gensim.models.keyedvectors.Vocab at 0x17bf7ec10b8>,
 '了': <gensim.models.keyedvectors.Vocab at 0x17bf7ec10f0>,
 '(': <gensim.models.keyedvectors.Vocab at 0x17bf7ec1128>,
 ')': <gensim.models.keyedvectors.Vocab at 0x17bf7ec1160>,
 "'": <gensim.models.keyedvectors.Vocab at 0x17bf7ec1198>,
 '和': <gensim.models.keyedvectors.Vocab at 0x17bf7ec11d0>,
 '不': <gensim.models.keyedvectors.Vocab at 0x17bf7ec1208>
 ...}
wv.vector_size
Out[7]: 300
wv.similarity("3","120")
Out[8]: 0.5710426

你可能感兴趣的:(自然语言处理)