In my previous blog: A Simple XML State Machine Accepting SAX Events to Build xmerl Compitable XML Tree: icalendar demo, I wrote a simple state machine to parse icalendar to xmerl compitable XML tree. This time, I'll use this state machine to parse a JSON expression to xmerl compitable XML tree, the work is fairly simple:
-module(json_parser). -define(stateMachine, fun xml_sm:state/2). -export([parse/1]). -export([test/0]). parse(Bin) when is_binary(Bin) -> parse(binary_to_list(Bin)); parse(Text) -> States1 = ?stateMachine({startDocument}, undefined), States2 = parse(skip_ws(Text), States1), States3 = ?stateMachine({endDocument}, States2). parse([], States) -> States; parse([$,|T], States) -> %io:fwrite(user, "element ended! ~n", []), States1 = ?stateMachine({endElement, [], undefined, []}, States), case skip_ws(T) of [H1|T1] when H1 == $"; H1 == $' -> {Rest, Name} = parse_string(skip_ws(T1), [], H1), %io:fwrite(user, "element started: ~p~n", [Name]), States2 = ?stateMachine({startElement, [], list_to_atom(Name), [], []}, States1), parse(skip_ws(Rest), States2); _ -> {error, "invalid expression"} end; parse([${|T], States) -> %io:fwrite(user, "Object started! ~n", []), States1 = ?stateMachine({startElement, [], obj, [], []}, States), case skip_ws(T) of [H1|T1] when H1 == $"; H1 == $' -> {Rest, Name} = parse_string(skip_ws(T1), [], H1), %io:fwrite(user, "Element started: ~p~n", [Name]), States2 = ?stateMachine({startElement, [], list_to_atom(Name), [], []}, States1), parse(skip_ws(Rest), States2); _ -> {error, "invalid expression"} end; parse([$}|T], States) -> %io:fwrite(user, "element ended! ~n", []), States1 = ?stateMachine({endElement, [], undefined, []}, States), %io:fwrite(user, "Object ended! ~n", []), States2 = ?stateMachine({endElement, [], obj, []}, States1), case skip_ws(T) of [$,|T1] -> %% skip the ',' following behind '}' to avoid a redunant endElement event parse(skip_ws(T1), States2); Chars -> parse(Chars, States2) end; parse([$:|T], States) -> case skip_ws(T) of [H1|T1] when H1 == $"; H1 == $' -> %% it's a string value {Rest, Value} = parse_string(T1, [], H1), %io:fwrite(user, "String Value: ~p~n", [Value]), States1 = ?stateMachine({characters, Value}, States), parse(skip_ws(Rest), States1); [H1|T1] when H1 /= ${, H1 /= $[ -> %% it's a number value {Rest, Value} = parse_number([H1|T1], []), %io:fwrite(user, "Number Value: ~p~n", [Value]), States1 = ?stateMachine({characters, Value}, States), parse(skip_ws(Rest), States1); Chars -> % it may be a new object or array parse(Chars, States) end; parse([$[|T], States) -> %io:fwrite(user, "Array started! ~n", []), parse(skip_ws(T), States); parse([$]|T], States) -> %io:fwrite(user, "Array ended! ~n", []), parse(skip_ws(T), States); parse(Chars, States) -> parse(skip_ws(Chars), States). skip_ws([H|T]) when H =< 32 -> skip_ws(T); skip_ws(Chars) -> Chars. parse_string([H|T], Acc, Quote) when H == Quote -> {T, lists:reverse(Acc)}; parse_string([H|T], Acc, Quote) -> parse_string(T, [H|Acc], Quote). parse_number([H|T], Acc) when H == $,; H == $}; H == $] -> {[H|T], lists:reverse(Acc)}; parse_number([H|T], Acc) -> parse_number(T, [H|Acc]). test() -> Text = " {'businesses': [{'address1': '650 Mission Street', 'address2': '', 'avg_rating': 4.5, 'categories': [{'category_filter': 'localflavor', 'name': 'Local Flavor', 'search_url': 'http://lightpole.net/search'}], 'city': 'San Francisco', 'distance': 0.085253790020942688, 'id': '4kMBvIEWPxWkWKFN__8SxQ', 'latitude': 37.787185668945298, 'longitude': -122.40093994140599}, {'address1': '25 Maiden Lane', 'address2': '', 'avg_rating': 5.0, 'categories': [{'category_filter': 'localflavor', 'name': 'Local Flavor', 'search_url': 'http://lightpole.net/search'}], 'city': 'San Francisco', 'distance': 0.23186808824539185, 'id': 'O1zPF_b7RyEY_NNsizX7Yw', 'latitude': 37.788387, 'longitude': -122.40401}]} ", {ok, Xml} = parse(Text), %io:fwrite(user, "Xml Tree: ~p~n", [Xml]), XmlText = lists:flatten(xmerl:export_simple([Xml], xmerl_xml)), io:fwrite(user, "Parsed: ~n~p~n", [XmlText]).
The result will be something like:
<?xml version=\"1.0\"?> <obj> <businesses> <obj> <address1>650 Mission Street</address1> <address2></address2> <avg_rating>4.5</avg_rating> <categories> <obj> <category_filter>localflavor</category_filter> <name>Local Flavor</name> <search_url>http://lightpole.net/search</search_url> </obj> </categories> <city>San Francisco</city> <distance>0.085253790020942688</distance> <id>4kMBvIEWPxWkWKFN__8SxQ</id> <latitude>37.787185668945298</latitude> <longitude>-122.40093994140599</longitude> </obj> <obj> <address1>25 Maiden Lane</address1> <address2></address2> <avg_rating>5.0</avg_rating> <categories> <obj> <category_filter>localflavor</category_filter> <name>Local Flavor</name> <search_url>http://lightpole.net/search</search_url> </obj> </categories> <city>San Francisco</city> <distance>0.23186808824539185</distance> <id>O1zPF_b7RyEY_NNsizX7Yw</id> <latitude>37.788387</latitude> <longitude>-122.40401</longitude> </obj> </businesses> </obj>
Now you fecth element by:
> [Latitude1] = xmerl_xpath:string("/obj/businesses/obj[1]/latitude/text()", Xml), > Latitude1#xmlText.value. "37.787185668945298"
Next time, I'll write a simple Erlang Data state machine, which will parse icalendar and json to simple Erlang Lists + Tuples.
The code of xml_sm.erl can be found in my previous blog.