Parse JSON to xmerl Compitable XML Tree via A Simple XML State Machine

In my previous blog: A Simple XML State Machine Accepting SAX Events to Build xmerl Compitable XML Tree: icalendar demo, I wrote a simple state machine to parse icalendar to xmerl compitable XML tree. This time, I'll use this state machine to parse a JSON expression to xmerl compitable XML tree, the work is fairly simple:

-module(json_parser).

-define(stateMachine, fun xml_sm:state/2).

-export([parse/1]).

-export([test/0]).


parse(Bin) when is_binary(Bin) ->
    parse(binary_to_list(Bin));
parse(Text) ->
    States1 = ?stateMachine({startDocument}, undefined),
    States2 = parse(skip_ws(Text), States1),
    States3 = ?stateMachine({endDocument}, States2).

parse([], States) -> States;
parse([$,|T], States) -> 
    %io:fwrite(user, "element ended! ~n", []),
    States1 = ?stateMachine({endElement, [], undefined, []}, States),
    case skip_ws(T) of
        [H1|T1] when H1 == $"; H1 == $' ->
            {Rest, Name} = parse_string(skip_ws(T1), [], H1),
            %io:fwrite(user, "element started: ~p~n", [Name]),
            States2 = ?stateMachine({startElement, [], list_to_atom(Name), [], []}, States1),
            parse(skip_ws(Rest), States2);
        _ -> {error, "invalid expression"}
    end;
parse([${|T], States) ->
    %io:fwrite(user, "Object started! ~n", []),
    States1 = ?stateMachine({startElement, [], obj, [], []}, States),
    case skip_ws(T) of
        [H1|T1] when H1 == $"; H1 == $' ->
            {Rest, Name} = parse_string(skip_ws(T1), [], H1),
            %io:fwrite(user, "Element started: ~p~n", [Name]),
            States2 = ?stateMachine({startElement, [], list_to_atom(Name), [], []}, States1),
            parse(skip_ws(Rest), States2);
        _ -> {error, "invalid expression"}
    end;
parse([$}|T], States) -> 
    %io:fwrite(user, "element ended! ~n", []),
    States1 = ?stateMachine({endElement, [], undefined, []}, States),
    %io:fwrite(user, "Object ended! ~n", []),
    States2 = ?stateMachine({endElement, [], obj, []}, States1),
    case skip_ws(T) of
        [$,|T1] ->  %% skip the ','  following behind '}' to avoid a redunant endElement event 
            parse(skip_ws(T1), States2);
        Chars -> 
            parse(Chars, States2)
    end;
parse([$:|T], States) ->
    case skip_ws(T) of
        [H1|T1] when H1 == $"; H1 == $' -> %% it's a string value 
            {Rest, Value} = parse_string(T1, [], H1),
            %io:fwrite(user, "String Value: ~p~n", [Value]),
            States1 = ?stateMachine({characters, Value}, States),
            parse(skip_ws(Rest), States1);
        [H1|T1] when H1 /= ${, H1 /= $[ -> %% it's a number value
            {Rest, Value} = parse_number([H1|T1], []),
            %io:fwrite(user, "Number Value: ~p~n", [Value]),
            States1 = ?stateMachine({characters, Value}, States),
            parse(skip_ws(Rest), States1);
        Chars -> % it may be a new object or array
            parse(Chars, States)
    end;
parse([$[|T], States) ->
    %io:fwrite(user, "Array started! ~n", []),
    parse(skip_ws(T), States); 
parse([$]|T], States) ->
    %io:fwrite(user, "Array ended! ~n", []),
    parse(skip_ws(T), States);     
parse(Chars, States) -> 
    parse(skip_ws(Chars), States).


skip_ws([H|T]) when H =< 32 ->
    skip_ws(T);
skip_ws(Chars) ->
    Chars.

parse_string([H|T], Acc, Quote) when H == Quote ->
    {T, lists:reverse(Acc)};
parse_string([H|T], Acc, Quote) ->
    parse_string(T, [H|Acc], Quote).

parse_number([H|T], Acc) when H == $,; H == $}; H == $] ->
    {[H|T], lists:reverse(Acc)};
parse_number([H|T], Acc) ->
    parse_number(T, [H|Acc]).



test() ->
    Text = "
{'businesses': [{'address1': '650 Mission Street',
                 'address2': '',
                 'avg_rating': 4.5,
                 'categories': [{'category_filter': 'localflavor',
                                 'name': 'Local Flavor',
                                 'search_url': 'http://lightpole.net/search'}],
                 'city': 'San Francisco',
                 'distance': 0.085253790020942688,
                 'id': '4kMBvIEWPxWkWKFN__8SxQ',
                 'latitude': 37.787185668945298,
                 'longitude': -122.40093994140599},
                {'address1': '25 Maiden Lane',
                 'address2': '',
                 'avg_rating': 5.0,
                 'categories': [{'category_filter': 'localflavor',
                                 'name': 'Local Flavor',
                                 'search_url': 'http://lightpole.net/search'}],
                 'city': 'San Francisco',
                 'distance': 0.23186808824539185,
                 'id': 'O1zPF_b7RyEY_NNsizX7Yw',
                 'latitude': 37.788387,
                 'longitude': -122.40401}]}
",
    {ok, Xml} = parse(Text),
    %io:fwrite(user, "Xml Tree: ~p~n", [Xml]),
    XmlText = lists:flatten(xmerl:export_simple([Xml], xmerl_xml)),
    io:fwrite(user, "Parsed: ~n~p~n", [XmlText]).

The result will be something like:

<?xml version=\"1.0\"?>
<obj>
  <businesses>
    <obj>
      <address1>650 Mission Street</address1>
      <address2></address2>
      <avg_rating>4.5</avg_rating>
      <categories>
        <obj>
          <category_filter>localflavor</category_filter>
          <name>Local Flavor</name>
          <search_url>http://lightpole.net/search</search_url>
        </obj>
      </categories>
      <city>San Francisco</city>
      <distance>0.085253790020942688</distance>
      <id>4kMBvIEWPxWkWKFN__8SxQ</id>
      <latitude>37.787185668945298</latitude>
      <longitude>-122.40093994140599</longitude>
    </obj>
    <obj>
      <address1>25 Maiden Lane</address1>
      <address2></address2>
      <avg_rating>5.0</avg_rating>
      <categories>
        <obj>
          <category_filter>localflavor</category_filter>
          <name>Local Flavor</name>
          <search_url>http://lightpole.net/search</search_url>
        </obj>
      </categories>
      <city>San Francisco</city>
      <distance>0.23186808824539185</distance>
      <id>O1zPF_b7RyEY_NNsizX7Yw</id>
      <latitude>37.788387</latitude>
      <longitude>-122.40401</longitude>
    </obj>
  </businesses>
</obj>

Now you fecth element by:

> [Latitude1] = xmerl_xpath:string("/obj/businesses/obj[1]/latitude/text()", Xml),
> Latitude1#xmlText.value.
"37.787185668945298"

Next time, I'll write a simple Erlang Data state machine, which will parse icalendar and json to simple Erlang Lists + Tuples.

The code of xml_sm.erl can be found in my previous blog.

你可能感兴趣的:(xml,json,.net,erlang,REST)