SnapChat 面试题:HTML Parser

题目:

给一个html文件,生成一个包含所有tag的树状结构。可以假设html格式是valid,即每一个tag一定有ending tag,而且tag的顺序也是valid。不会出现<div><span></div></span>的情况。

// This is the text editor interface. 
// Anything you type or change here will be seen by the other person in real time.

/*
<html>
    <head> </head>
    <body>
        <div>
            <div>
            </div>
            <div>
            </div>
        </div>
        <span>
        </span>
        <img>
    </body>
</html>
*/

import java.util.*;

class TreeNode {
    String tag;
    List<TreeNode> children;
    TreeNode(String t) {
        tag = t;
        children = new ArrayList<>();
    }
}

class HtmlParser {

    public TreeNode dummyRoot = new TreeNode("");

    public TreeNode parser(String document) {
        findChildren(dummyRoot, document);
        return dummyRoot;
    }

    public void findChildren(TreeNode root, String document) {

        //need to find all the children here

        int start = 0;
        while(document.indexOf("<", start) != -1) {
            int begin = document.indexOf("<", start);
            int end = document.indexOf(">", start);
            String tag = "";
            if(begin != -1 && end != -1)
                tag = document.substring(begin + 1, end);
            else
                return;

            if(tag.equals("img")) {
                TreeNode node = new TreeNode(tag);
                root.children.add(node);
                start = end + 1;
                continue;
            }

            int i = end;
            int count = 1;
            while(document.indexOf(tag, i) != -1) {
                int p = document.indexOf(tag, i);
                if(document.charAt(p - 1) == '/') {
                    count--;
                    if(count == 0) {
                        TreeNode node = new TreeNode(tag);
                        root.children.add(node);
                        findChildren(node, document.substring(end + 1, p - 2));
                        i = p + tag.length() + 1;
                        break;
                    }

                } else if(document.charAt(p - 1) == '<') {
                    count++;
                }
                i = p + tag.length() + 1;
            }
            start = i;
        }
    }



}

public class Solution {
    public static void main(String[] args) {
        HtmlParser p = new HtmlParser();
        String s = "<html><head></head><body><div><div></div><div></div></div><span></span><img></body></html>";
        TreeNode root = p.parser(s);
    }
}

用了递归。


你可能感兴趣的:(Algorithm,面试题,Snapchat)