Android从WebView中获取html,得到页面显示的文本

private void initView() {
    WebView mWebView = findViewById(R.id.webview);
    mWebView.loadUrl("file:///android_asset/book/GUID-0B7E066B-D444-4C7C-918F-339F9D7B5319.html");

    //使webview支持javascript
    WebSettings mSetting = mWebView.getSettings();
    mSetting.setJavaScriptEnabled(true);

    //添加一个js交互对象
    mWebView.addJavascriptInterface(new InJavaScriptLocalObj(), "java_obj");

    //添加一个WebViewClient监听状态,并且在完成时通过js调用java对象的方法
    mWebView.setWebViewClient(new WebViewClient() {
        @Override
        public void onPageFinished(WebView view, String url) {
            view.loadUrl("javascript:window.java_obj.getSource(document.documentElement.outerHTML);void(0)");
            super.onPageFinished(view, url);
        }
    });
}
getSource中解析
public final class InJavaScriptLocalObj {
    //一定也要加上这个注解,否则没有用
    @JavascriptInterface
    public void getSource(String html) {
    	//获取html中的所有文本
        String regFormat = "\\s*|\t|\r|\n";
        String regTag = "<[^>]*>";
        speechText = html.replaceAll(regFormat,"").replaceAll(regTag,"");

        //删掉style
        speechText = speechText.substring(speechText.indexOf("}") + 1);
        speechText = speechText.substring(speechText.indexOf("}") + 1);

        //删掉title
        Document doc = Jsoup.parseBodyFragment(html);
        String title = doc.select("title").text();
        speechText = speechText.replaceFirst(title,"");

        //删除图片名称
        Elements imgs = doc.getElementsByClass("guid-img");
        if (imgs != null && !imgs.isEmpty()) {
            for(int i = 0;i < imgs.size();i++){
                String image = imgs.get(i).text();
                Log.i("YYYY","delete pic name " + image);
                speechText = speechText.replaceAll(image,"");
            }
        }

        //删掉表格
        String table = doc.select("table").text();
        table = table.replaceAll(" ","");
        speechText = speechText.replaceAll(table,"");

        //最后输出
        Log.i("YYYY","final text = " + speechText);
    }
}
通过jsoup提取标签,需要在app的build.gradle中添加:
implementation 'org.jsoup:jsoup:1.9.2' 

你可能感兴趣的:(html,Android)