为了备份blog,简单写了一个适用于blogjava等metaWeblog的blog备份工具,功能:
(1)备份post的正文到本地
(2)备份正文中的图片、css文件到本地
(3)基于以上两的步骤,修改相关的链接,实现本地脱机浏览
想到了但是未实现的功能:
(1)评论无法保存
(2)合适的话可以考虑以Eclipes RCP形式包装
一、实现原理
(1)获取post的方法:使用MetaWeblog提供的API接口
metaWeblog.getRecentPosts (blogid, username, password, numberOfPosts) returns array of structs,
Each struct represents a recent weblog post, containing the same information that a call to metaWeblog.getPost would return.
If numberOfPosts is 1, you get the most recent post. If it's 2 you also get the second most recent post, as the second array element. If numberOfPosts is greater than the number of posts in the weblog you get all the posts in the weblog.
(2) 使用正则表达式分析获取下来的post,解析出post中包含的css和图片文件的地址,执行两步操作
- 根据地址,抓取图片保存到本地
- 修改post中的地址为本地保存地址
(3) 使用xml-rpc来简化远程调用过程的编程
二、主要的代码
public
ArrayList
<
SimplePost
>
getAllPosts(String blogID, String name,String password,
int
num)
throws
XmlRpcException {
ArrayList < SimplePost > posts = new ArrayList < SimplePost > ();
Object[] params = new Object[] { blogID, name, password, new Integer(num) };
Object[] result = (Object[]) client.execute( " metaWeblog.getRecentPosts " , params);
for ( int i = 0 ; i < result.length; i ++ ) {
Map map = (Map) result[i];
String postUrl = (String) map.get( " link " );
String title = (String) map.get( " title " );
String postId = (String) map.get( " postid " );
// post的内容
String description = (String) map.get( " description " );
Map < String, String > images = new HashMap < String, String > ();
images = getImagesURL(description);
String newDes = handleImagesURL(description,postId);
String descriptioFileName = savePostContent(savePath, title,postId, newDes, css);
SimplePost post = new SimplePost(postUrl, title, postId,descriptioFileName);
// 从postContent获取图像的地址和名称,以便获取图片并保存
post.setImages(images);
posts.add(post);
log.debug( " postID: " + postId + " postTitle : " + title);
}
return posts;
}
ArrayList < SimplePost > posts = new ArrayList < SimplePost > ();
Object[] params = new Object[] { blogID, name, password, new Integer(num) };
Object[] result = (Object[]) client.execute( " metaWeblog.getRecentPosts " , params);
for ( int i = 0 ; i < result.length; i ++ ) {
Map map = (Map) result[i];
String postUrl = (String) map.get( " link " );
String title = (String) map.get( " title " );
String postId = (String) map.get( " postid " );
// post的内容
String description = (String) map.get( " description " );
Map < String, String > images = new HashMap < String, String > ();
images = getImagesURL(description);
String newDes = handleImagesURL(description,postId);
String descriptioFileName = savePostContent(savePath, title,postId, newDes, css);
SimplePost post = new SimplePost(postUrl, title, postId,descriptioFileName);
// 从postContent获取图像的地址和名称,以便获取图片并保存
post.setImages(images);
posts.add(post);
log.debug( " postID: " + postId + " postTitle : " + title);
}
return posts;
}
public
static
Map
<
String, String
>
getImagesURL(String description) {
Map < String, String > map = new HashMap < String, String > ();
// img 的正则表达式
String imgPattern = " <\\s*img\\s+([^>]+)\\s*> " ;
Pattern p = Pattern.compile(imgPattern, Pattern.CASE_INSENSITIVE);
Matcher matcher = p.matcher(description);
// img src元素的正则表达式
String srcPattern = " \\s*src\\s*=\\s*\ " ([ ^ \ " ]+)\\s*\ "" ;
Pattern p2 = Pattern.compile(srcPattern, Pattern.CASE_INSENSITIVE);
while (matcher.find()) {
Matcher matcher2 = p2.matcher(matcher.group());
// 一定要find(),这是实际的匹配动作
if (matcher2.find()) {
String src = matcher2.group();
log.info(src);
int i2 = src.lastIndexOf( ' / ' );
int i1 = src.indexOf( " http " );
if (i1 != - 1 ) {
map.put(src.substring(i2 + 1 , src.length() - 1 ), src
.substring(i1, src.length() - 1 ));
}
}
}
log.debug( " 图片: " + map);
return map;
}
Map < String, String > map = new HashMap < String, String > ();
// img 的正则表达式
String imgPattern = " <\\s*img\\s+([^>]+)\\s*> " ;
Pattern p = Pattern.compile(imgPattern, Pattern.CASE_INSENSITIVE);
Matcher matcher = p.matcher(description);
// img src元素的正则表达式
String srcPattern = " \\s*src\\s*=\\s*\ " ([ ^ \ " ]+)\\s*\ "" ;
Pattern p2 = Pattern.compile(srcPattern, Pattern.CASE_INSENSITIVE);
while (matcher.find()) {
Matcher matcher2 = p2.matcher(matcher.group());
// 一定要find(),这是实际的匹配动作
if (matcher2.find()) {
String src = matcher2.group();
log.info(src);
int i2 = src.lastIndexOf( ' / ' );
int i1 = src.indexOf( " http " );
if (i1 != - 1 ) {
map.put(src.substring(i2 + 1 , src.length() - 1 ), src
.substring(i1, src.length() - 1 ));
}
}
}
log.debug( " 图片: " + map);
return map;
}
/**
* 替换description的图片链接为本地的相对链接,结构为blogFiles/images/postid/
*
* @param description
* @param userName
* @param postId
* @return
*/
public static String handleImagesURL(String description, String postId) {
String tmp = description;
String address = " images/ " + postId + " / " ;
String imgPattern = " <\\s*img\\s+([^>]+)\\s*> " ;
Pattern p = Pattern.compile(imgPattern, Pattern.CASE_INSENSITIVE);
Matcher matcher = p.matcher(tmp);
// img src元素的正则表达式
String srcPattern = " \\s*src\\s*=\\s*\ " ([ ^ \ " ]+)\\s*\ "" ;
// String srcPattern = "\\s*src\\s*=\\s*\'([^\']+)\\s*\'";
Pattern p2 = Pattern.compile(srcPattern, Pattern.CASE_INSENSITIVE);
while (matcher.find()) {
Matcher matcher2 = p2.matcher(matcher.group());
// 一定要find(),这是实际的匹配动作
if (matcher2.find()) {
String src = matcher2.group();
log.info(src);
int l2 = src.lastIndexOf( ' / ' ) + 1 ;
log.info(src.substring(l2,src.length() - 1 ));
tmp = tmp.replace(src, " src=\ "" +address+src.substring(l2,src.length()-1)+ " \ "" );
}
}
return tmp;
}
* 替换description的图片链接为本地的相对链接,结构为blogFiles/images/postid/
*
* @param description
* @param userName
* @param postId
* @return
*/
public static String handleImagesURL(String description, String postId) {
String tmp = description;
String address = " images/ " + postId + " / " ;
String imgPattern = " <\\s*img\\s+([^>]+)\\s*> " ;
Pattern p = Pattern.compile(imgPattern, Pattern.CASE_INSENSITIVE);
Matcher matcher = p.matcher(tmp);
// img src元素的正则表达式
String srcPattern = " \\s*src\\s*=\\s*\ " ([ ^ \ " ]+)\\s*\ "" ;
// String srcPattern = "\\s*src\\s*=\\s*\'([^\']+)\\s*\'";
Pattern p2 = Pattern.compile(srcPattern, Pattern.CASE_INSENSITIVE);
while (matcher.find()) {
Matcher matcher2 = p2.matcher(matcher.group());
// 一定要find(),这是实际的匹配动作
if (matcher2.find()) {
String src = matcher2.group();
log.info(src);
int l2 = src.lastIndexOf( ' / ' ) + 1 ;
log.info(src.substring(l2,src.length() - 1 ));
tmp = tmp.replace(src, " src=\ "" +address+src.substring(l2,src.length()-1)+ " \ "" );
}
}
return tmp;
}