Parse JSON to xmerl Compitable XML Tree via A Simple XML State Machine

Updated Aug 5: rewrote json_parser.erl base on tonyg's RFC4627 implementation, fixed some bugs.

In my previous blog: A Simple XML State Machine Accepting SAX Events to Build xmerl Compitable XML Tree: icalendar demo, I wrote a simple state machine to parse icalendar to xmerl compitable XML tree. This time, I'll use this state machine to parse a JSON expression to xmerl compitable XML tree, the work is fairly simple:

-module(json_parser).

-define(stateMachine, fun xml_sm:state/2).

-export([parse/1]).

-export([test/0]).


parse(Bin) when is_binary(Bin) ->
    parse(binary_to_list(Bin));
parse(Text) ->
    States1 = ?stateMachine({startDocument}, undefined),
    States2 = parse_value(skip_ws(Text), States1),
    States3 = ?stateMachine({endDocument}, States2).


parse_value([], States) -> States;
parse_value([H|T], States) when H == $"; H == $' -> 
    {Rest, Value} = parse_string(T, [], H),
    States1 = ?stateMachine({characters, Value}, States),
    {Rest, States1};
parse_value([${|T], States) -> 
    States1 = ?stateMachine({startElement, [], obj, [], []}, States),
    parse_object(skip_ws(T), States1);
parse_value([$[|T], States) -> 
    parse_array(skip_ws(T), States);
parse_value(Chars, States) -> 
    {Rest, Value} = parse_number(skip_ws(Chars), []),
    States1 = ?stateMachine({characters, Value}, States),
    {Rest, States1}.

parse_object([$}|T], States) ->
    States1 = ?stateMachine({endElement, [], obj, []}, States),
    case skip_ws(T) of
        []     -> States1;  %% final result
        Chars  -> {Chars, States1}
    end;
parse_object([$,|T], States) ->
    parse_object(skip_ws(T), States);
parse_object([H|T], States) when H == $"; H == $' -> 
    {Rest, Name} = parse_string(skip_ws(T), [], H),
    States1 = ?stateMachine({startElement, [], list_to_atom(Name), [], []}, States),
    [$:|Rest1] = skip_ws(Rest),
    {Rest2, States2} = parse_value(skip_ws(Rest1), States1),
    States3 = ?stateMachine({endElement, [], undefined, []}, States2),
    parse_object(skip_ws(Rest2), States3).

parse_array([$]|T], States) ->
    {T, States};
parse_array([$,|T], States) ->
    parse_array(skip_ws(T), States);
parse_array(Chars, States) ->
    {Rest, States1} = parse_value(Chars, States),
    parse_array(skip_ws(Rest), States1).

parse_string([H|T], Acc, Quote) when H == Quote ->
    {T, lists:reverse(Acc)};
parse_string([H|T], Acc, Quote) ->
    parse_string(T, [H|Acc], Quote).

parse_number([H|T], Acc) when H == $,; H == $}; H == $] ->
    {[H|T], lists:reverse(Acc)};
parse_number([H|T], Acc) ->
    parse_number(T, [H|Acc]).


skip_ws([H|T]) when H =< 32 ->
    skip_ws(T);
skip_ws(Chars) ->
    Chars.



test() ->
    Text = "
{'businesses': [{'address1': '650 Mission Street',
                 'address2': '',
                 'avg_rating': 4.5,
                 'categories': [{'category_filter': 'localflavor',
                                 'name': 'Local Flavor',
                                 'search_url': 'http://lightpole.net/search'}],
                 'city': 'San Francisco',
                 'distance': 0.085253790020942688,
                 'id': '4kMBvIEWPxWkWKFN__8SxQ',
                 'latitude': 37.787185668945298,
                 'longitude': -122.40093994140599},
                {'address1': '25 Maiden Lane',
                 'address2': '',
                 'avg_rating': 5.0,
                 'categories': [{'category_filter': 'localflavor',
                                 'name': 'Local Flavor',
                                 'search_url': 'http://lightpole.net/search'}],
                 'city': 'San Francisco',
                 'distance': 0.23186808824539185,
                 'id': 'O1zPF_b7RyEY_NNsizX7Yw',
                 'latitude': 37.788387,
                 'longitude': -122.40401}]}
",
    {ok, Xml} = parse(Text),
    %io:fwrite(user, "Xml Tree: ~p~n", [Xml]),
    XmlText = lists:flatten(xmerl:export_simple([Xml], xmerl_xml)),
    io:fwrite(user, "Parsed: ~n~p~n", [XmlText]),
    Latitude1 = xmerl_xpath:string("/obj/businesses/obj[1]/latitude/text()", Xml),
    io:format(user, "Latitude1: ~p~n", [Latitude1]).

The result will be something like:

<?xml version=\"1.0\"?>
<obj>
  <businesses>
    <obj>
      <address1>650 Mission Street</address1>
      <address2></address2>
      <avg_rating>4.5</avg_rating>
      <categories>
        <obj>
          <category_filter>localflavor</category_filter>
          <name>Local Flavor</name>
          <search_url>http://lightpole.net/search</search_url>
        </obj>
      </categories>
      <city>San Francisco</city>
      <distance>0.085253790020942688</distance>
      <id>4kMBvIEWPxWkWKFN__8SxQ</id>
      <latitude>37.787185668945298</latitude>
      <longitude>-122.40093994140599</longitude>
    </obj>
    <obj>
      <address1>25 Maiden Lane</address1>
      <address2></address2>
      <avg_rating>5.0</avg_rating>
      <categories>
        <obj>
          <category_filter>localflavor</category_filter>
          <name>Local Flavor</name>
          <search_url>http://lightpole.net/search</search_url>
        </obj>
      </categories>
      <city>San Francisco</city>
      <distance>0.23186808824539185</distance>
      <id>O1zPF_b7RyEY_NNsizX7Yw</id>
      <latitude>37.788387</latitude>
      <longitude>-122.40401</longitude>
    </obj>
  </businesses>
</obj>

Now you fecth element by:

> [Latitude1] = xmerl_xpath:string("/obj/businesses/obj[1]/latitude/text()", Xml),
> Latitude1#xmlText.value.
"37.787185668945298"

Next time, I'll write a simple Erlang Data state machine, which will parse icalendar and json to simple Erlang Lists + Tuples.

The code of xml_sm.erl can be found in my previous blog.

你可能感兴趣的:(xml,json,.net,erlang,REST)