Updated Aug 16: Fix bugs when json is an array. Add a 'json:root' element always since valid xml should have a root. Remove 'obj' tag that is not necessary.
Updated Aug 15: A more complete json_parser.erl. Thanks for tonyg's beautiful work, fixed some bugs.
Updated Aug 5: rewrote json_parser.erl base on tonyg's RFC4627 implementation, fixed some bugs.
In my previous blog: A Simple XML State Machine Accepting SAX Events to Build xmerl Compitable XML Tree: icalendar demo, I wrote a simple state machine to parse icalendar to xmerl compitable XML tree. This time, I'll use this state machine to parse a JSON expression to xmerl compitable XML tree, the work is fairly simple:
%%--------------------------------------------------------------------------- %% Copyright (c) 2007 Tony Garnock-Jones <[email protected]> %% Copyright (c) 2007 LShift Ltd. <[email protected]> %% Copyright (c) 2007 LightPole, Inc. %% %% Permission is hereby granted, free of charge, to any person %% obtaining a copy of this software and associated documentation %% files (the "Software"), to deal in the Software without %% restriction, including without limitation the rights to use, copy, %% modify, merge, publish, distribute, sublicense, and/or sell copies %% of the Software, and to permit persons to whom the Software is %% furnished to do so, subject to the following conditions: %% %% The above copyright notice and this permission notice shall be %% included in all copies or substantial portions of the Software. %% %% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, %% EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF %% MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND %% NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS %% BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN %% ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN %% CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE %% SOFTWARE. %%--------------------------------------------------------------------------- %% -module(json_parser). -define(stateMachine, fun xml_sm:state/2). -define(JsonNSUri, "http://www.lightpole.net/xmlns/1.0"). -define(JsonNSAtrr, {'xmlns:json', ?JsonNSUri}). -define(JsonNSRoot, 'json:root'). -define(JsonNSArray, 'json:array'). -record(context, {machine, qname}). -export([parse_to_xml/1, parse_to_poet/1]). -export([test/0]). parse_to_xml(Data) -> parse(Data, #context{machine = fun xml_sm:state/2}). parse_to_poet(Data) -> parse(Data, #context{machine = fun poet_sm:state/2}). parse(Bin, Context) when is_binary(Bin) -> parse(binary_to_list(Bin), Context); parse(Str, #context{machine=MachineFun}=Context) -> State1 = MachineFun({startDocument}, undefined), State2 = parse_root(skip_ws(Str), State1, Context), _State = MachineFun({endDocument}, State2). %% since a valid xml should have a root element, we add one here. parse_root([${|T], State, #context{machine=MachineFun}=Context) -> State1 = MachineFun({startElement, ?JsonNSUri, root, ?JsonNSRoot, [?JsonNSAtrr]}, State), Context1 = Context#context{qname = undefined}, {_Rest, State2} = parse_object(skip_ws(T), State1, Context1), _State = MachineFun({endElement, ?JsonNSUri, root, ?JsonNSRoot}, State2); parse_root([$[|T], State, #context{machine=MachineFun}=Context) -> State1 = MachineFun({startElement, ?JsonNSUri, root, ?JsonNSRoot, [?JsonNSAtrr]}, State), Context1 = Context#context{qname = ?JsonNSArray}, {_Rest, State2} = parse_array(skip_ws(T), State1, Context1), _State = MachineFun({endElement, ?JsonNSUri, root, ?JsonNSRoot}, State2). parse_object([$}|T], State, _Context) -> {T, State}; parse_object([$,|T], State, Context) -> parse_object(skip_ws(T), State, Context); parse_object([$"|T], State, #context{machine=MachineFun}=Context) -> {Rest, ObjNameStr} = parse_string(skip_ws(T), []), ObjName = list_to_atom(ObjNameStr), Context1 = Context#context{qname = ObjName}, [$:|T1] = skip_ws(Rest), {Rest1, State1} = case skip_ws(T1) of [$[|T2] -> %% the value is array, we'll create a list of elements named as this 'ObjName' parse_array(skip_ws(T2), State, Context1); _ -> StateX1 = MachineFun({startElement, "", ObjName, ObjName, []}, State), {RestX, StateX2} = parse_value(skip_ws(T1), StateX1, Context1), StateX3 = MachineFun({endElement, "", ObjName, ObjName}, StateX2), {RestX, StateX3} end, parse_object(skip_ws(Rest1), State1, Context1). parse_array([$]|T], State, _Context) -> {T, State}; parse_array([$,|T], State, Context) -> parse_array(skip_ws(T), State, Context); parse_array(Chars, State, #context{machine=MachineFun, qname=QName}=Context) -> State1 = MachineFun({startElement, "", QName, QName, []}, State), {Rest, State2} = parse_value(Chars, State1, Context), State3 = MachineFun({endElement, "", QName, QName}, State2), parse_array(skip_ws(Rest), State3, Context). parse_value([], State, _Context) -> {[], State}; parse_value("true"++T, State, #context{machine=MachineFun}) -> State1 = MachineFun({characters, "true"}, State), {T, State1}; parse_value("false"++T, State, #context{machine=MachineFun}) -> State1 = MachineFun({characters, "false"}, State), {T, State1}; parse_value("null"++T, State, #context{machine=MachineFun}) -> State1 = MachineFun({characters, "null"}, State), {T, State1}; parse_value([$"|T], State, #context{machine=MachineFun}) -> {Rest, Value} = parse_string(T, []), State1 = MachineFun({characters, Value}, State), {Rest, State1}; parse_value([${|T], State, Context) -> parse_object(skip_ws(T), State, Context); parse_value([$[|T], State, Context) -> parse_array(skip_ws(T), State, Context); parse_value(Chars, State, #context{machine=MachineFun}) -> {Rest, Value} = parse_number(skip_ws(Chars), []), State1 = MachineFun({characters, Value}, State), {Rest, State1}. parse_string([$"|T], Acc) -> {T, lists:reverse(Acc)}; parse_string([$\\, Key|T], Acc) -> parse_escaped_char(Key, T, Acc); parse_string([H|T], Acc) -> parse_string(T, [H|Acc]). parse_escaped_char($b, Rest, Acc) -> parse_string(Rest, [8|Acc]); parse_escaped_char($t, Rest, Acc) -> parse_string(Rest, [9|Acc]); parse_escaped_char($n, Rest, Acc) -> parse_string(Rest, [10|Acc]); parse_escaped_char($f, Rest, Acc) -> parse_string(Rest, [12|Acc]); parse_escaped_char($r, Rest, Acc) -> parse_string(Rest, [13|Acc]); parse_escaped_char($/, Rest, Acc) -> parse_string(Rest, [$/|Acc]); parse_escaped_char($\\, Rest, Acc) -> parse_string(Rest, [$\\|Acc]); parse_escaped_char($", Rest, Acc) -> parse_string(Rest, [$"|Acc]); parse_escaped_char($u, [D0, D1, D2, D3|Rest], Acc) -> parse_string(Rest, [(digit_hex(D0) bsl 12) + (digit_hex(D1) bsl 8) + (digit_hex(D2) bsl 4) + (digit_hex(D3))|Acc]). digit_hex($0) -> 0; digit_hex($1) -> 1; digit_hex($2) -> 2; digit_hex($3) -> 3; digit_hex($4) -> 4; digit_hex($5) -> 5; digit_hex($6) -> 6; digit_hex($7) -> 7; digit_hex($8) -> 8; digit_hex($9) -> 9; digit_hex($A) -> 10; digit_hex($B) -> 11; digit_hex($C) -> 12; digit_hex($D) -> 13; digit_hex($E) -> 14; digit_hex($F) -> 15; digit_hex($a) -> 10; digit_hex($b) -> 11; digit_hex($c) -> 12; digit_hex($d) -> 13; digit_hex($e) -> 14; digit_hex($f) -> 15. finish_number(Rest, Acc) -> Value = lists:reverse(Acc), % Value = % case catch list_to_integer(Str) of % {'EXIT', _} -> list_to_float(Str); % Number -> Number % end, {Rest, Value}. parse_number([], _Acc) -> exit(syntax_error); parse_number([$-|T], Acc) -> parse_number1(T, [$-|Acc]); parse_number(Rest, Acc) -> parse_number1(Rest, Acc). parse_number1(Rest, Acc) -> {Acc1, Rest1} = parse_int_part(Rest, Acc), case Rest1 of [] -> finish_number([], Acc1); [$.|More] -> {Acc2, Rest2} = parse_int_part(More, [$.| Acc1]), parse_exp(Rest2, Acc2, false); _ -> parse_exp(Rest1, Acc1, true) end. parse_int_part([], Acc) -> {Acc, []}; parse_int_part([Ch|Rest], Acc) -> case is_digit(Ch) of true -> parse_int_part(Rest, [Ch | Acc]); false -> {Acc, [Ch | Rest]} end. parse_exp([$e|T], Acc, NeedFrac) -> parse_exp1(T, Acc, NeedFrac); parse_exp([$E|T], Acc, NeedFrac) -> parse_exp1(T, Acc, NeedFrac); parse_exp(Rest, Acc, _NeedFrac) -> finish_number(Rest, Acc). parse_exp1(Rest, Acc, NeedFrac) -> {Acc1, Rest1} = parse_signed_int_part(Rest, if NeedFrac -> [$e, $0, $.|Acc]; true -> [$e|Acc] end), finish_number(Rest1, Acc1). parse_signed_int_part([$+|T], Acc) -> parse_int_part(T, [$+|Acc]); parse_signed_int_part([$-|T], Acc) -> parse_int_part(T, [$-|Acc]); parse_signed_int_part(Rest, Acc) -> parse_int_part(Rest, Acc). is_digit(C) when is_integer(C) andalso C >= $0 andalso C =< $9 -> true; is_digit(_) -> false. skip_ws([H|T]) when H =< 32 -> skip_ws(T); skip_ws(Chars) -> Chars. test() -> Text1 = "{\"firstname\":\"Caoyuan\", \"iq\":\"150\"}", {ok, Xml1} = parse_to_xml(Text1), XmlText1 = lists:flatten(xmerl:export_simple([Xml1], xmerl_xml)), io:fwrite(user, "Parsed XML: ~n~p~n", [XmlText1]), {ok, Poet1} = parse_to_poet(Text1), io:fwrite(user, "Parsed POET: ~n~p~n", [Poet1]), Text2 = "[{\"firstname\":\"Caoyuan\", \"iq\":\"150\"}, {\"firstname\":\"Haobo\", \"iq\":150}]", {ok, Xml2} = parse_to_xml(Text2), XmlText2 = lists:flatten(xmerl:export_simple([Xml2], xmerl_xml)), io:fwrite(user, "Parsed: ~n~p~n", [XmlText2]), Text = " {\"businesses\": [{\"address1\": \"650 Mission Street\", \"address2\": \"\", \"avg_rating\": 4.5, \"categories\": [{\"category_filter\": \"localflavor\", \"name\": \"Local Flavor\", \"search_url\": \"http://lightpole.net/search\"}], \"city\": \"San Francisco\", \"distance\": 0.085253790020942688, \"id\": \"4kMBvIEWPxWkWKFN__8SxQ\", \"latitude\": 37.787185668945298, \"longitude\": -122.40093994140599}, {\"address1\": \"25 Maiden Lane\", \"address2\": \"\", \"avg_rating\": 5.0, \"categories\": [{\"category_filter\": \"localflavor\", \"name\": \"Local Flavor\", \"search_url\": \"http://lightpole.net/search\"}], \"city\": \"San Francisco\", \"distance\": 0.23186808824539185, \"id\": \"O1zPF_b7RyEY_NNsizX7Yw\", \"latitude\": 37.788387, \"longitude\": -122.40401}]} ", {ok, Xml} = parse_to_xml(Text), %io:fwrite(user, "Xml Tree: ~p~n", [Xml]), XmlText = lists:flatten(xmerl:export_simple([Xml], xmerl_xml)), io:fwrite(user, "Parsed: ~n~p~n", [XmlText]), Latitude1 = xmerl_xpath:string("/lp:root/businesses[1]/latitude/text()", Xml), io:format(user, "Latitude1: ~p~n", [Latitude1]).
The result will be something like:
<?xml version="1.0"?> <json:root xmlns:json="http://www.lightpole.net/xmlns/1.0"> <businesses> <address1>650 Mission Street</address1> <address2></address2> <avg_rating>4.5</avg_rating> <categories> <category_filter>localflavor</category_filter> <name>Local Flavor</name> <search_url>http://lightpole.net/search</search_url> </categories> <city>San Francisco</city> <distance>0.085253790020942688</distance> <id>4kMBvIEWPxWkWKFN__8SxQ</id> <latitude>37.787185668945298</latitude> <longitude>-122.40093994140599</longitude> </businesses> <businesses> <address1>25 Maiden Lane</address1> <address2></address2> <avg_rating>5.0</avg_rating> <categories> <category_filter>localflavor</category_filter> <name>Local Flavor</name> <search_url>http://lightpole.net/search</search_url> </categories> <city>San Francisco</city> <distance>0.23186808824539185</distance> <id>O1zPF_b7RyEY_NNsizX7Yw</id> <latitude>37.788387</latitude> <longitude>-122.40401</longitude> </businesses> </root>
Now you fecth element by:
> [Latitude1] = xmerl_xpath:string("/json:root/businesses[1]/latitude/text()", Xml), > Latitude1#xmlText.value. "37.787185668945298"
Next time, I'll write a simple Erlang Data state machine, which will parse icalendar and json to simple Erlang Lists + Tuples.
The code of xml_sm.erl can be found in my previous blog.