词频统计

ROSTCM6

 

 1. http://www.writewords.org.uk/word_count.asp
2. http://darylkinsman.ca/tools/wordfreq.shtml
3. http://www.wordcounter.com/

VBA macro of    word

Sub ChineseCharCounting()
        '统计汉字的字词频,并按降序排序
        '中文词语的判断与Word的词典关联
        Dim a As Byte
        Dim n As Long
        Dim TF As Boolean
        Dim filetext As String
        Dim d
        Dim Wd As Range
        Dim W As Range
        Dim b
        Dim e As Long
        Dim c() As String
        Dim i As Long
        Dim temp As String
        Dim st As Single
       
        a = MsgBox("词频统计请按“是”,字频统计请按“否”", vbYesNo, "中文字词频统计")
        st = Timer
        Application.ScreenUpdating = False
        n = ActiveDocument.Content.ComputeStatistics(wdStatisticFarEastCharacters)
        If ActiveDocument.Content.Text Like "*[【】〖〗《》〈〉〔〕]*" Then TF = True
        With ActiveDocument.Content.Find
            .Text = "[【】〖〗《》〈〉〔〕]"
            .MatchWildcards = True
            .Execute Replace:=wdReplaceAll
        End With
        Set d = CreateObject("Scripting.Dictionary")
        If a = vbYes Then
            For Each Wd In ActiveDocument.Words
                With Wd
                    If .Start < e Then .Start = e
                    e = .End
                    If .Text Like "*[一-龥]*" And Len(.Text) > 1 Then
                        If .Text Like "*[!一-龥]*" = False And .Words.Count = 1 Then
                            d(.Text) = d(.Text) + 1
                        Else
                            For i = 1 To Len(.Text)
                                If Mid(.Text, i, 1) Like "[!一-龥]" Then Exit For
                            Next
                            With .Duplicate
                                .End = .Start + i - 1
                                For Each W In .Words
                                    With W
                                        If Len(.Text) > 1 Then
                                            If Right(.Text, 1) Like "[!一-龥]" Then .End = .End - 1
                                            If .Text Like "*[!一-龥]*" = False Then d(.Text) = d(.Text) + 1
                                        End If
                                    End With
                                Next
                            End With
                        End If
                    End If
                End With
            Next
        Else
            filetext = ActiveDocument.Content.Text
            For i = 1 To Len(filetext)
                temp = Mid(filetext, i, 1)
                If temp Like "[一-龥]" Then d(temp) = d(temp) + 1
            Next
        End If
        b = d.keys
        ReDim c(UBound(b))
        For i = 0 To UBound(b)
            c(i) = b(i) & vbTab & d(b(i))
        Next
        If TF = True Then ActiveDocument.Undo 1
       
        With Documents.Add.Content
            .Text = "文档共有" & n & "个中文字符。共提取到" & d.Count _
                & IIf(a = 6, "个中文词语", "个不同的汉字") & ",其出现次数分别为:" & vbCrLf & Join(c, vbCrLf)
            .Parent.DefaultTabStop = .Characters.First.Font.Size * 6
            .MoveStart wdParagraph
            .Sort , 2, wdSortFieldNumeric, wdSortOrderDescending, 1, , , , , , wdSortSeparateByTabs
        End With
        MsgBox "提取完毕。用时" & Format(Timer - st, "0") & "秒。"
        Application.ScreenUpdating = True
    End Sub

你可能感兴趣的:(词频统计)