一个下载网页的程序,很不完善,希望各位可以帮我完善一下!
列如:下载时设置编码;
异步下载;
显示下载进度等等。
我在这里先谢谢各位了!
using
System;
using
System.Collections.Generic;
using
System.Linq;
using
System.Text;
using
System.Net;
namespace
ConsoleApplication1
{
class
Program
{
static
void
Main(
string
[] args)
{
WebRequest hwr
=
HttpWebRequest.Create(
"
http://www.sina.com.cn
"
);
WebResponse wp
=
hwr.GetResponse();
using
(System.IO.FileStream fs
=
new
System.IO.FileStream(AppDomain.CurrentDomain.BaseDirectory
+
"
abc.html
"
, System.IO.FileMode.Create))
{
System.IO.Stream str
=
wp.GetResponseStream();
byte
[] b
=
new
byte
[
1024
];
int
len
=
0
;
while
(
true
) {
len
=
str.Read(b,
0
,
1024
);
if
(len
==
0
)
break
;
fs.Write(b,
0
, len);
}
fs.Flush();
}
}
}
}
WebClient client
=
new
WebClient();
//
下面这一句好像没有用
client.Headers.Add(
"
user-agent
"
,
"
Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; .NET CLR 1.0.3705;)
"
);
Stream data
=
client.OpenRead(
"
http://www.cnblogs.com
"
);
StreamReader reader
=
new
StreamReader(data);
string
s
=
reader.ReadToEnd();
Console.WriteLine(s);
data.Close();
reader.Close();
下面的代码是异步下载网页!并且可以设置编码,我试过了,下载博客园的首页用(UTF-8)没有问题,但是下新浪和网易首页用(GB3212),记事本打开没有问题,但是用IE和FireFox等下载工具,打开是乱码!我设置在浏览器中设置编号,也不管用,不知道是为什么??
using
System;
using
System.Collections.Generic;
using
System.Linq;
using
System.Text;
namespace
ConsoleApplication1
{
public
class
Class1
{
public
static
void
Main(){
System.Net.WebClient wc
=
new
System.Net.WebClient();
wc.OpenReadAsync(
new
Uri(
"
http://www.cnblogs.com
"
));
wc.OpenReadCompleted
+=
new
System.Net.OpenReadCompletedEventHandler(wc_OpenReadCompleted);
Console.WriteLine(
"
请稍等。。
"
);
Console.Read();
}
static
void
wc_OpenReadCompleted(
object
sender, System.Net.OpenReadCompletedEventArgs e){
using
(System.IO.StreamWriter sw
=
new
System.IO.StreamWriter(AppDomain.CurrentDomain.BaseDirectory
+
"
aa.html
"
,
false
)) {
System.IO.StreamReader sr
=
new
System.IO.StreamReader(e.Result, Encoding.GetEncoding(
"
utf-8
"
));
sw.Write(sr.ReadToEnd());
sw.Flush();
}
Console.WriteLine(
"
你好,你的数据已经下载完成!
"
);
}
}
}