头歌java实训答案集
头歌MySQL数据库实训答案 有目录
1、软件测试是为了发现程序中的Bug而执行程序的过程。
A、对
B、错
2、软件测试的目的是为了证明软件是正确的。
A、对
B、错
3、下列哪些说法是正确的?
A、软件测试证明不了软件不存在问题
B、软件测试只能找出程序中的错误
C、从心理学的角度来看,代码的编写者不适合作为该代码的测试人员
D、好的测试方案是极有可能发现迄今为止尚未发现的错误的测试方案
1、下列哪些做法是对的?
A、小明在写完代码之后才着手准备测试事宜
B、穷举测试是不可能的
C、应该先从单个模块开始测试,测试没问题后,再测试模块簇
D、软件开发工程师是完成全部测试工作的最佳人选,因为程序是他们写的
2、小明是一名测试工程师,负责XX软件的测试工作,下列做法对小明是有好处的?
A、设计测试方案时以用户需求为导向
B、仔细研读软件的需求说明书
C、从整个软件的功能开始测试,发现问题后再深度测试与问题有关联的模块
D、对于账号登录模块测试时,穷举1000种账号密码的组合来进行测试
#include"func.h"
#include<stdio.h>
//输入测试用例进行测试,其中number表示测试输入,result表示预期输出
//该函数内部会调用IsNarcissisticNumber函数,若实际输出与预期输出不一致,会给予提示
void RunTestCase(int number, int result);
void WhiteTest()
{
/*请按要求设计测试用例,并调用RunTestCase*/
/*********Begin********/
int number=1000,result=0;
RunTestCase(number,result);
int number1=370,result1=1;
RunTestCase(number1,result1);
int number2=300,result2=0;
RunTestCase(number2,result2);
/*********End*********/
}
}
#include"func.h"
#include<stdio.h>
//输入测试用例进行测试,其中number表示测试输入,result表示预期输出
//该函数内部会调用IsPrime函数,若实际输出与预期输出不一致,会给予提示
void RunTestCase(int number, int result);
void WhiteTest()
{
/*请按要求设计测试用例,并调用RunTestCase*/
/*********Begin********/
int number=0,result=0;
RunTestCase(number,result);
int number1=8,result1=0;
RunTestCase(number1,result1);
int number2=3,result2=1;
RunTestCase(number2,result2);
int number3=2,result3=1;
RunTestCase(number3,result3);
/*********End*********/
}
#include"func.h"
#include<stdio.h>
//计算直角三角形的斜边长度
float calcEdge(float a, float b, float area);
void BlackTest1()
{
/*请按要求设计测试用例,并调用calcEdge*/
/*********Begin********/
calcEdge(1,2,3);
calcEdge(3,4,5);
calcEdge(-7,4,-1);
calcEdge(3,-10,-1);
/*********End*********/
}
#include"func.h"
#include<stdio.h>
//计算直角三角形的斜边长度
float calcEdge(float a, float b, float area);
void BlackTest1()
{
/*请按要求设计测试用例,并调用calcEdge*/
/*********Begin********/
calcEdge(-0.99,4.01,-1);
calcEdge(3.99,-0.01,-1);
calcEdge(-0.99,4.01,-1);
calcEdge(3.99,-0.01,-1);
/*********End*********/
}
{
"count":3,
"students":
[
{ "name":"赵昊" , "age": 15, "ismale" : true },
{ "name":"龙傲天" , "age": 16, "ismale" : true },
{ "name":"玛丽苏" , "age": 15, "ismale" : false }
]
}
import json
def Func():
data = open("step2/2017.txt","r",encoding = "utf-8")
obj = json.load(data)
data.close()
#********** Begin *********#
obj={"count":4 ,
"infos":
[
{"name":"赵昊" , "age":16 ,"height": 1.83, "sex" : "男性" },
{"name":"龙傲天" , "age":17 ,"height": 2.00, "sex" : "男性"},
{"name":"玛丽苏" , "age":16 ,"height": 1.78, "sex" : "女性"},
{"name":"叶良辰" , "age":17 ,"height": 1.87, "sex" : "男性"}
]
}
#********** End **********#
output = open("step2/2018.txt","w",encoding = "utf-8")
json.dump(obj,output) #输出到文件
output.close()
#include"func.h"
#include<stdio.h>
//计算直角三角形的斜边长度
float calcEdge(float a, float b, float area);
void BlackTest1()
{
/*请按要求设计测试用例,并调用calcEdge*/
/*********Begin********/
float a=3,b=4,area=5;
calcEdge(a,b,area);
float a1=6,b1=8,area1=10;
calcEdge(a1,b1,area1);
float a2=-3,b2=2,area2=-1;
calcEdge(a2,b2,area2);
float a3=-7,b3=3,area3=-1;
calcEdge(a3,b3,area3);
/*********End*********/
}
#include"func.h"
#include<stdio.h>
//计算直角三角形的斜边长度
float calcEdge(float a, float b, float area);
void BlackTest1()
{
/*请按要求设计测试用例,并调用calcEdge*/
/*********Begin********/
float a=0.0,b=0.0,area=0.0;
calcEdge(a,b,area);
float a1=-0.1,b1=0.0,area1=-1.0;
calcEdge(a1,b1,area1);
float a2=0.0,b2=-0.1,area2=-1.0;
calcEdge(a2,b2,area2);
float a3=-0.1,b3=-0.1,area3=-1.0;
calcEdge(a3,b3,area3);
/*********End*********/
}
#include <string.h>
#include <stdlib.h>
#include "student.h"
void intersection(int* A, int* B, int lenA, int lenB, int** C, int* lenC)
{
/*
A表示数组A,B表示数组B,lenA表示数组A的长度,lenB表示数组
B的长度,C表示结果,lenC表示结果的长度
需要将交集存放在数组C中。
*/
/*********Begin********/
int* hash_A = (int*)malloc(100*sizeof(int));
int* hash_B = (int*)malloc(100*sizeof(int));
memset(hash_A, 0, 100);
memset(hash_B, 0, 100);
for(int i =0; i < lenA; ++i)
{hash_A[A[i]]++;}
for(int i=0;i < lenB; ++i)
{
hash_B[B[i]]++;
}
int total = 0;
for(int i = 0; i < 100; ++i)
{
if(hash_A[i]>0 && hash_B[i]>0)
total++;
}
int* result = (int* )malloc(total*sizeof(int));
int cur_index = 0;
for(int i = 0; i < 100; ++i){
if(hash_A[i]>0 && hash_B[i]>0)
result[cur_index++] = i;
}
*C=result;
*lenC =total;
/*********End*********/
}
#include"func.h"
#include<stdio.h>
void intersection(int* A, int* B, int lenA, int lenB, int** C, int* lenC);
void UnitTest()
{
/*请按照测试用例来测试交集功能*/
int pass_count = 0;
int err_count = 0;
int total_count = 4;
/*********Begin*********/
int A[] = {0, 99, 1, 5, 4, 8, 1, 4};
int B[] = {12, 44, 4, 8, 0, 99, 1};
int *C = NULL;
int lenC = 0;
intersection(A, B, 8, 7, &C, &lenC);
int result[] = {99, 8, 4, 1, 0};
if(lenC == 5)
{
int pass = 1;
for(int i = 0; i < lenC; ++i)
{
if(C[i] != result[i])
{
pass =0;
break;
}
}
if(pass)
pass_count++;
else
err_count++;
}
else
err_count++;
int A1[] = {0};
int B1[] = {0};
C = NULL;
lenC = 0;
intersection(A1, B1, 1, 1, &C, &lenC);
int result1[] = {0};
if(lenC == 1)
{
int pass = 1;
for(int i = 0; i < lenC; ++i)
{
if(C[i] != result1[i])
{
pass =0;
break;
}
}
if(pass)
pass_count++;
else
err_count++;
}
else
err_count++;
int A2[] = {1, 2, 3, 4, 5};
int B2[] = {5, 3, 2, 1, 4};
C = NULL;
lenC = 0;
intersection(A2, B2, 5, 5, &C, &lenC);
int result2[] = {5, 4, 3, 2, 1};
if(lenC == 5)
{
int pass = 1;
for(int i = 0; i < lenC; ++i)
{
if(C[i] != result2[i])
{
pass =0;
break;
}
}
if(pass)
pass_count++;
else
err_count++;
}
else
err_count++;
int A3[] = {6, 6, 6, 6, 6, 6, 6};
int B3[] = {6};
C = NULL;
lenC = 0;
intersection(A3, B3, 7, 1, &C, &lenC);
int result3[] = {6};
if(lenC == 1)
{
int pass = 1;
for(int i = 0; i < lenC; ++i)
{
if(C[i] != result3[i])
{
pass =0;
break;
}
}
if(pass)
pass_count++;
else
err_count++;
}
else
err_count++;
/*********End*********/
printf("pass_count=%d, err_count=%d, total_count=%d\n", pass_count, err_count, total_count);
}
#include "CuTest.h"
#include"func.h"
#include<stdio.h>
//计算两个数组的交集,并将结果转换成字符串返回
char* intersection(int* A, int* B, int lenA, int lenB);
void TestCase1(CuTest *tc)
{
//测试用例1
/*********Begin*********/
int A[] = {0,99,1,5,4,8,1,4};
int B[] = {12,44,4,8,0,99,1};
char* actual = intersection(A,B,8,7);
char* excepect = "99, 8, 4, 1, 0";
CuAssertStrEquals(tc,excepect,actual);
/*********End*********/
}
void TestCase2(CuTest *tc)
{
//测试用例2
/*********Begin*********/
int A[] = {0};
int B[] = {0};
char* actual = intersection(A,B,1,1);
char* excepect = "0";
CuAssertStrEquals(tc,excepect,actual);
/*********End*********/
}
void TestCase3(CuTest *tc)
{
//测试用例3
/*********Begin*********/
int A[] = {1,2,3,4,5};
int B[] = {5,3,2,4,1};
char* actual = intersection(A,B,5,5);
char* excepect = "5, 4, 3, 2, 1";
CuAssertStrEquals(tc,excepect,actual);
/*********End*********/
}
void TestCase4(CuTest *tc)
{
//测试用例4
/*********Begin*********/
int A[] = {6,6,6,6,6,6};
int B[] = {6,6,6,6,6,6};
char* actual = intersection(A,B,7,1);
char* excepect = "6";
CuAssertStrEquals(tc,excepect,actual);
/*********End*********/
}
CuSuite* GetSuite() {
CuSuite* suite = CuSuiteNew();
SUITE_ADD_TEST(suite, TestCase1);
SUITE_ADD_TEST(suite, TestCase2);
SUITE_ADD_TEST(suite, TestCase3);
SUITE_ADD_TEST(suite, TestCase4);
return suite;
}
void RunAllTests()
{
/*
运行所有的测试用例
*/
CuString *output = CuStringNew();
CuSuite* suite = CuSuiteNew();
/*********Begin*********/
CuSuiteAddSuite(suite,GetSuite());
/*********End*********/
CuSuiteRun(suite);
CuSuiteSummary(suite, output);
CuSuiteDetails(suite, output);
printf("%s\n", output->buffer);
}
package step1;
import org.junit.Test;
import static org.junit.Assert.assertEquals;
import step1.JunitSub;
public class JunitSubTest {
//引入JunitSub对象
JunitSub js = new JunitSub();
int sub = js.sub(5,2);
int testSub = 3;
/*
请在下面的Begin/End内写一个测试函数,
来验证JunitSub中的sub函数编写是否正确
*/
/***********************Begin**************************/
@Test
public void testSub() {
assertEquals(sub, testSub);
}
/************************End***************************/
}
package step2;
import org.junit.After;
import org.junit.AfterClass;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Ignore;
import org.junit.Test;
public class JunitAnnotation {
/*
*以下Junit测试程序的输出结果为:
*in before class
*in before
*in test
*in after
*in after class
*请修改下面Begin/End内各个测试函数的注解,使输出结果逆序
*/
/***********************Begin**************************/
//execute before class
@BeforeClass
public static void beforeClass() {
System.out.println("in after class");
}
//execute after class
@AfterClass
public static void afterClass() {
System.out.println("in before class");
}
//execute before test
@Before
public void before() {
System.out.println("in after");
}
//execute after test
@After
public void after() {
System.out.println("in before");
}
//test case
@Test
public void test() {
System.out.println("in test");
}
/************************End***************************/
}
package step3;
import static org.junit.Assert.*;
import org.junit.Test;
public class AssertionsTest {
String obj1 = "junit";
String obj2 = "junit";
String obj3 = "test";
String obj4 = "test";
String obj5 = null;
int var1 = 1;
int var2 = 2;
int[] arithmetic1 = { 1, 2, 3 };
int[] arithmetic2 = { 1, 2, 3 };
@Test
public void test() {
//请在下面的Begin/End内写添加断言测试的代码,不要改动其他地方的代码
/***********************Begin**************************/
assertEquals(obj1 , obj2);
assertSame(obj3 , obj4);
assertNotSame(obj2,obj4);
assertNotNull(obj1);
assertNull(obj5);
assertTrue (var1 < var2);
assertArrayEquals(arithmetic1 , arithmetic2);
/************************End***************************/
}
}
package step4;
import org.junit.Test;
public class TestTimeOut {
//请在下面的Begin/End内补全test()超时测试函数,要求如果超过1000毫秒执行未结束,就判定测试未通过
/***********************Begin**************************/
@Test(timeout = 1000)
public void test() {
while(true){}
}
/************************End***************************/
}
package step1;
import static org.junit.Assert.assertEquals; //静态导入
import java.util.Arrays;
import java.util.Collection;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import org.junit.runners.Parameterized.Parameters;
import step1.Calculator;
/**
* JUnit4的参数化测试
*/
@RunWith(Parameterized.class)
public class ParameterTest {
private int input11;
private int input22;
private int expected;
public ParameterTest(int input11, int input22, int expected){
this.input11 = input11;
this.input22 = input22;
this.expected = expected;
}
@Parameters
public static Collection prepareData(){
/**
*该二维数组的类型必须是Object类型的
*该二维数组中的数据是为测试Calculator中的sub()方法而准备的
*该二维数组中的每一个元素中的数据都对应着构造方法ParameterTest()中的参数的位置
*所以依据构造方法的参数位置判断,该二维数组中的第一个数减去第二个数等于第三个数
*请在Begin/End内补全代码,要求为单元测试传递4组参数,来验证Calculator中的sub函数编写是否正确
*提示:只需要补2行代码
*/
/*********************************Begin*************************************************/
Object [][] bject = {{3,2,1},{1,1,0},{-1,1,-2},{0,0,0}};
return Arrays.asList(bject);
/**********************************End**************************************************/
}
@Test
public void testSub(){
Calculator cal = new Calculator();
assertEquals(cal.sub(input11, input22), expected);
}
}
package step2;
import static org.junit.Assert.*;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.ExpectedException;
import step2.Person;
public class JunitException {
/**
*请在Begin/End内加一行注解,要求检查Person对象的年龄是否合法,不合法则
*抛出IllegalArgumentException异常
*/
/************************************Begin**********************************************/
@Test(expected = IllegalArgumentException.class)
/************************************End************************************************/
public void checkage() {
Person person = new Person();
person.setAge(-1);
}
}
package step3;
import static org.junit.Assert.*;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Suite;
import step3.Calculate;
import step3.CalculateTest;
import step3.Car;
import step3.CarTest;
/*
请在星号后加两行注解,要求实现CalculateTest类和CarTest类的套件测试
套件测试代码需要紧靠SuiteTest这个类,不能有换行
*/
//**************************************************************
@RunWith(Suite.class)
@Suite.SuiteClasses({ CalculateTest.class, CarTest.class, CalculateTest.class })
public class SuiteTest {
}
package step4;
import org.junit.runner.JUnitCore;
import org.junit.runner.Result;
import org.junit.runner.notification.Failure;
public class TestRunner {
public static void main(String[] args) {
//请在Begin/End内加一行代码,要求如果测试类JunitSubTest.java中的测试都通过,则main函数会打印true
/******************************Begin**************************************************/
Result result = JUnitCore.runClasses(JunitSubTest.class);
/******************************End****************************************************/
for (Failure failure : result.getFailures()) {
System.out.println(failure.toString());
}
System.out.println(result.wasSuccessful());
}
}
package step1;
import java.util.ArrayList;
//implement
public class Elevator implements SchedulableCarrier {
/**
* OVERVIEW: 电梯类,模拟电梯实体,能够响应用户请求并改变电梯状态。该类自己记录和管理电梯运动过程中的状态变化,
* 包括当前停靠的楼层,停靠、开关门之后的时间,当前运动方向。
*/
//最高楼层和最低楼层
private int highLevel, lowLevel;
//电梯当前状态(记录停靠楼层、开关门之后时间、当前运动方向)
private Query curStatus;
//当前捎带请求队列
private ArrayList<Query> curHandleQuery;
public Elevator(int high, int low){
/*@REQUIRES:(high > low)&&(high > 0)&&(low > 0)&&(high<=highLevel)
@EFFECTS: (\result = this) && (this.curStatus != null) && (this.highLevel == high) && (this.lowLevel == low)&&(this.curHandleQuery.isEmpty())
*/
highLevel = high;
lowLevel = low;
curStatus = new Query(1, 0);
curHandleQuery = new ArrayList<Query>();
}
public boolean moveUP() {
if(getCurFloor() + 1 > highLevel) {
return false;
} else {
curStatus = new Query(getCurFloor() + 1, getCurTime() + moveTime, Query.Direction.UP);
return true;
}
}
public boolean moveDOWN() {
if(getCurFloor() - 1 < lowLevel) {
return false;
} else {
curStatus = new Query(getCurFloor() - 1, getCurTime() + moveTime, Query.Direction.DOWN);
return true;
}
}
public boolean callOpenAndClose() {
curStatus = new Query(getCurFloor(), getCurTime() + callTime);
return true;
}
public String toString() {
return "(" + this.getCurFloor() + "," + this.getCurDirect() + "," + this.getCurQuery().getTarget() + ")";
}
public int getCurFloor() {
return curStatus.getTarget();
}
public double getCurTime() {
return curStatus.getTime();
}
public Query.Direction getCurDirect() {
return curStatus.getDirection();
}
public Query getCurQuery() {
return emptyQuery() ? null : curHandleQuery.get(0);
}
public boolean emptyQuery() {
return curHandleQuery.isEmpty();
}
//OVERVIEW:检查是否有已经完成的请求
public boolean checkFinishedQuery(){
/*@REQUIRES:None
@MODIFIES: curHandleQuery
@EFFECTS:
(\all int i; 0 <= i< curHandleQuery.size;curHandleQuery[i].targetFloor!=this.curStatus.targetFloor)==>\result==false
(\any int i; 0 <= i< curHandleQuery.size; old(curHandleQuery)[i].targetFloor==(this.curStatus.tartgetFloor)==>(curHandleQuery.contains(\old(curHandleQuery)[i])==false)&&(curHandleQuery.size = \old(curHandleQuery).size - 1)\result==true)
*/
for(int i = 1; i < curHandleQuery.size(); ++i) {
Query pickedQuery = curHandleQuery.get(i);
if(pickedQuery.getTarget() == getCurFloor()) {
System.out.printf("(%d, %s, %.1f)\t(%s)\n", getCurFloor(), "STAY", getCurTime(), pickedQuery.toString());
curHandleQuery.remove(i);
break;
}
}
return true;
}
//OVERVIEW:将符合捎带条件的请求加入电梯当前处理队列
public void pickupQuery(Query req) {
/*@REQUIRES:(req!=null)&&(req.queryTime<=this.curStatus.queryTime)&&(req.queryDirection==this.curStatus.Direction)
@MODIFIES: this
@EFFECTS: (\old(this).curHandleQuery.isEmpty())==>(this.curStatus ==req);
(\all int i, j; 0 <= i & i < j & j < curHandleQuery.size; (curHandleQuery.size == \old(curHandleQuery).size+1) && (curHandleQuery.contains(req)==true)&&(curHandleQuery[i].queryTime<=curHandleQuery[j].queryTime)
*/
}
//电梯响应当前捎带请求队列
public void moveForQuery() throws Exception {
if(emptyQuery()) {
return;
}
Query req = getCurQuery();
boolean ifOpenAndClose = false;
//开始主请求工作之前,查询当前捎带队列中是否有已完成请求,若有,全部剔除,然后开关门一次
ifOpenAndClose=checkFinishedQuery();
if(ifOpenAndClose) {
callOpenAndClose();
return;
}
//执行主请求,逐层更新电梯状态
int directDelta = (int)Math.signum(req.getTarget() - getCurFloor());
String curDirect;
switch(directDelta) {
case -1 : {
curDirect = "DOWN";
moveDOWN();
break;
}
case 0 : {
curDirect = "STAY";
break;
}
case 1 : {
curDirect = "UP";
moveUP();
break;
}
default : throw new Exception("Invalid Status.");
}
// 每更新完一次电梯状态查询当前捎带队列中是否有已完成请求,若有,全部剔除,并开关门一次
ifOpenAndClose=checkFinishedQuery();
//主请求完成
if(req.getTarget() == getCurFloor()) {
ifOpenAndClose = true;
System.out.printf("(%d, %s, %.1f)\n", getCurFloor(), curDirect, getCurTime());
curHandleQuery.remove(0);
}
if(ifOpenAndClose) {
callOpenAndClose();
}
}
}
package step1;
import org.junit.Test;
import static org.junit.Assert.assertEquals;
/**
* Created by liqiankun on 2018/11/10 0010
* Description:
*/
public class ElevatorTest {
/**
*请在下面的Begin/End内写一个测试函数来验证Elevator中的各方法的编写是否正确编写是否正确
*/
/***********************Begin**************************/
// public static void main(String[] args) {
// Elevator elevator=new Elevator(10,1);
// elevator.moveUP();
// //elevator.moveForQuery();
// Query query = new Query(2, 1.5);
// //Query req = elevator.getCurQuery();
// elevator.pickupQuery(query);
// elevator.checkFinishedQuery();
// }
Elevator elevator=new Elevator(10,1);
@Test
public void moveUPTest(){
elevator.moveUP();
}
/************************End***************************/
}
package step2;
import java.util.ArrayList;
class UnsortedException extends Exception {
private static final long serialVersionUID = 1L;
public UnsortedException(String message) {
super(message);
}
}
public class QueryList {
/**
* OVERVIEW:请求队列类,管理乘客请求(Query)
* 请求队列需要在添加请求时对请求的合法性进行二次判断,所以需要记录楼层的取值范围,队列中最后一个请求的时间
* 对于不满足时间非降序的添加请求,需要抛出一个可辨识的异常便于处理逻辑,所以额外定义了一个UnsortedException类
* 请求队列提供添加、遍历、清空的方法
*/
//请求队列
private ArrayList<Query> queue;
//最高和最低楼层
private int highLevel, lowLevel;
//队列中最近一次请求的时间
private double lastTime;
public QueryList(int high, int low, double time) {
queue = new ArrayList<Query>();
highLevel = high;
lowLevel = low;
lastTime = time;
}
public QueryList(int high, int low) {
this(high, low, 0);
}
public boolean append(Query req) {
/*@ REQUIRES: req != null ;
@ MODIFIES: this;
@ EFFECTS:
(this.lastTime>req.queryTime)==>\result=false;
(req.targetFloor=low && req.queryDirection==Direction.DOWN)==>\result=false;
(req.targetFloor=high && req.queryDirection==Direction.UP)==>\result=false;
(this.queue.size == \old(this.queue).size+1) && (this.queue.contains(req)==true)&&(this.queue.lastTime==req.queryTime) && (\result==true);
*/
return true;
}
public boolean remove(int index){
/*@MODIFIES:this
@EFFECTS:
normal_behavior
(\old(this).get(index) !=null) ==> (this.size == \old(this).size-1) && (this.contains(\old(this).get(index))==false) && (\result==true) ;
(\old(this).size ==0)==>exceptional_behavior(EmptyQueueException)
(index >=\old(this).size) ==> exceptional_behavior (InvalidIndexException);
(index < 0) ==> exceptional_behavior (InvalidIndexException);
*/
try {
queue.remove(index);}
catch(Exception e) {
return false;
}
return true;
}
public int getSize() {
return queue.size();
}
public Query getQuery(int index) {
return queue.get(index);
}
public void clear() {
queue.clear();
}
}
package step2;
import org.junit.Test;
import static org.junit.Assert.assertEquals;
/**
* Created by liqiankun on 2018/11/10 0010
* Description:
*/
public class QueryListTest {
QueryList queryList =new QueryList(10,1);
@Test
public void testappend() {
Query query = new Query(2, 1.5);
queryList.append(query);
}
}
package step1;
import org.junit.Test;
/**
* Created by liqiankun on 2018/11/10 0010
* Description:
*/
public class QueryTest {
Query query=new Query(2, 1.5);
@Test
public void testQuery(){
query.repOk();
}
}
{
"count":3,
"students":
[
{ "name":"赵昊" , "age": 15, "ismale" : true },
{ "name":"龙傲天" , "age": 16, "ismale" : true },
{ "name":"玛丽苏" , "age": 15, "ismale" : false }
]
}
import json
def Func():
data = open("step2/2017.txt","r",encoding = "utf-8")
obj = json.load(data)
data.close()
#********** Begin *********#
obj={"count":4 ,
"infos":
[
{"name":"赵昊" , "age":16 ,"height": 1.83, "sex" : "男性" },
{"name":"龙傲天" , "age":17 ,"height": 2.00, "sex" : "男性"},
{"name":"玛丽苏" , "age":16 ,"height": 1.78, "sex" : "女性"},
{"name":"叶良辰" , "age":17 ,"height": 1.87, "sex" : "男性"}
]
}
#********** End **********#
output = open("step2/2018.txt","w",encoding = "utf-8")
json.dump(obj,output) #输出到文件
output.close()
import urllib.request
from lxml import etree
import http.cookiejar
import json
def request_sess(url,headers):
cj=http.cookiejar.CookieJar()
opener=urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cj))
request = urllib.request.Request(url=url, headers=headers)
r=opener.open(fullurl=request)
html = r.read().decode('utf-8')
return html
def save_data(path):
'''
:param path: 文件保存路径
:return: 无
'''
url='http://127.0.0.1:8080/index'
headers={
'User-Agent':'Mozilla/5.0 (Linux; Android 8.0.0; Pixel 2 XL Build/OPD1.170816.004) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Mobile Safari/537.36'
}
# ********** Begin ************** #
# 输出 JSON 数据中的 key 值为 code 对应的数据
s=request_sess(url,headers)
a=json.loads(s)
print(a['code'])
# 将爬取下来的 JSON 数据保存到本地
with open(path,'w') as f:
json.dump(a,f)
# ********** End ************** #
{
"students":[
{
"name":"赵昊",
"age":15,
"ismale":true
},
{
"name":"龙傲天",
"age":16,
"ismale":true
},
{
"name":"玛丽苏",
"age":15,
"ismale":false
}
],
"count":3
}
import json
def Func():
data = open("step2/2017.txt","r",encoding = "utf-8")
obj = json.load(data)
data.close()
#********** Begin *********#
infos = obj['infos']
for stu in range(len(infos)):
infos[stu]['age']+=1
infos.append({'name':'叶良辰','age':17,'height':1.87,'sex':'男性'})
obj = {
'count':4,
'infos':infos
}
#********** End **********#
output = open("step2/2018.txt","w",encoding = "utf-8")
json.dump(obj,output) #输出到文件
output.close()
<?xml version="1.0" encoding="UTF-8"?>
<data count="3">
<student name="赵昊" age="15" sex="男性"></student>
<student name="龙傲天" age="16" sex="男性"></student>
<student name="玛丽苏" age="15" sex="女性"></student>
</data>
import xml.sax
class Handler(xml.sax.ContentHandler):
#********** Begin *********#
infostart = False
def startElement(self,name,attrs):
self.infostart = name == "info"
def endElement(self,name):
self.infostart = False
def characters(self,content):
if self.infostart:
print(content)
#********** End **********#
def GetHandler():
return Handler()
import xml.etree.ElementTree as ET
class InfoManager:
doc = None
root = None
def LoadInfo(self):
self.doc = ET.parse("step5/data.xml")
self.root = self.doc.getroot()
def GetInfoCount(self):
return int(self.root.get("count"))
def GetAge(self,name):
infos = self.root.findall("info")
for s in infos: #搜索name属性与函数参数name的值相同的xml元素
if s.get("name") == name:
return int(s.get("age"))
return 0 #随便返回一个值,评测代码不会有找不到的数据
def GetDescription(self,name):
infos = self.root.findall("info")
for s in infos: #搜索name属性与函数参数name的值相同的xml元素
if s.get("name") == name:
return s.text
return "" #随便返回一个值,评测代码不会有找不到的数据
1.选取bookstore元素的所有子节点
********** Begin *********
bookstore
*********** End **********
2.选取所有拥有名为 lang 的属性的 title 元素
********** Begin *********
//title[@lang]
*********** End **********
3.选取所有 title 元素,且这些元素拥有值为 eng 的 lang 属性和值为good的class属性
********** Begin *********
//title[@lang='eng' and @class="good"]
*********** End **********
4.选取属于 bookstore 子元素的book元素下的所有文本内容
********** Begin *********
/bookstore/book/text()
*********** End **********
5.选取属于 bookstore 子元素的第一个 book 元素
********** Begin *********
/bookstore/book[1]
*********** End **********
1.选取所有属于当前节点的子元素的 book 节点
********** Begin *********
child::book
*********** End **********
2.选取当前节点的 lang 属性
********** Begin *********
attribute::lang
*********** End **********
3.选取当前节点的所有 price 孙节点
********** Begin *********
child::*/child::price
*********** End **********
# 导入lxml库
from lxml import etree
# 读取lll.html文件并转化为元素树对象
parse = etree.HTMLParser(encoding='utf-8')
tree = etree.parse('src/step3/lll.html', parse)
# 补充xpath表达式,获取所有书的名称
# ********** Begin ********* #
print(tree.xpath('//bookstore/book/title/text()'))
# *********** End ********** #
# 补充xpath表达式,获取所有书的价格
# ********** Begin ********* #
print(tree.xpath('//bookstore/book/price/text()'))
# *********** End ********** #
# 填写代码, 获取价格低于30的书名
# ********** Begin ********* #
print(tree.xpath('//bookstore/book[price < 30.00]/title/text()')[0])
# *********** End ********** #
import requests
def get_html(url):
'''
两个参数
:param url:统一资源定位符,请求网址
:param headers:请求头
:return:html
'''
# ***************** Begin ******************** #
# 补充请求头
headers={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/"
"537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36"}
# get请求网页
response = requests.get(url=url, headers=headers) # 模拟登录请求
response.encoding = "utf-8" # 定义编码
# 获取网页信息文本
html = response.text
# ***************** End ******************** #
return html
import requests
def get_html(url):
'''
两个参数
:param url:统一资源定位符,请求网址
:param headers:请求头
:return html 网页的源码
:return sess 创建的会话
'''
# ***************** Begin ******************** #
# 补充请求头
headers={ 'User-Agent':'Mozilla/5.0 (Linux; Android 8.0.0; Pixel 2 XL Build/OPD1.170816.004) AppleWebKit/'
'537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Mobile Safari/537.36',
"Cookie":"BAIDUID=53B7CC4BFCDC39D2EF625C13D285429D:FG=1; BIDUPSID=53B7CC4BFCDC39D2EF625C13D285429D; "
"PSTM=1591665716; BD_UPN=12314753; BDUSS=2N2ajRYZnI2cVlZN1FRemlWNU9FV1lSZFM3SnZBS0dvRW44WFRCUTRWck1mUVpmR"
"VFBQUFBJCQAAAAAAAAAAAEAAAAoKJzNMTIyMzM4ODQ1uNW41QAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
"AAAAAAAAAAAAMzw3l7M8N5eS; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; sug=3; sugstore=1; ORIGIN=0; bdime=0; "
"H_PS_PSSID=1456_31672_32139_31253_32046_32230_31708_32295_26350_22160; delPer=0; BD_CK_SAM=1; PSINO=6; "
"H_PS_645EC=3b86vFCd303Aw0wmqvkcAGpfxU4oXfwYcs6jRd1RnxihTsvhfqaVB%2BIoeBs; BDSVRTM=0"
}
# 创建Session, 并使用Session的get请求网页
sess = requests.session()
# 获取网页信息文本
response = sess.get(url,headers=headers)
response_home = sess.get(url=url)
html=response.text
# ****************** End ********************* #
return html, sess
import urllib.request
def request(url):
'''
一个参数
:param url:请求网址
:return:返回一个请求的字符串。编码为utf-8
'''
# *************** Begin *************** #
r=urllib.request.urlopen(url)
return r.read().decode('utf-8')
# *************** End ***************** #
第2关 urllib进阶
import urllib.request
import http.cookiejar
def request(url,headers):
'''
两个参数
:param url:统一资源定位符,请求网址
:param headers:请求头
:return:html
'''
# ***************** Begin ******************** #
cookie = http.cookiejar.CookieJar()
handler = urllib.request.HTTPCookieProcessor(cookie)
opener = urllib.request.build_opener(handler)
r= opener.open(url)
# ***************** End ******************** #
html = r.read().decode('utf-8')
return html
import urllib.request
from lxml import etree
def get_data(url):
'''
:param url: 请求地址
:return: None
'''
response=urllib.request.urlopen(url=url)
html=response.read().decode("utf-8")
# *************** Begin *************** #
parse = etree.HTML(html)
item_list = parse.xpath("//div[@class='left']/ul/li/span/a/text()")
# *************** End ***************** #
print(item_list)
<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="utf-8" />
<meta name="applicable-device" content="pc">
<title></title>
</head>
<body>
<div class="topbd">
<div class="header">
<div class="top">
<h1><a href="http://www.nulog.cn" title="中国古诗文网"></a></h1>
<ul class="nav">
<li id="i"><a href="http://www.nulog.cn">首页</a></li>
<li id="li1"><a href="/gushi/">古诗</a></li>
<li id="li2"><a href="/mingju/">名句</a></li>
<li id="li3"><a href="/shiren/">诗人</a></li>
<li id="li4"><a href="/book/">古籍</a></li>
<li id="li5"><a href="/shicilishi/">历史</a></li>
<li id="li6"><a href="/shicizhishi/">知识</a></li>
<li id="li7"><a href="/chengyu/">成语</a></li>
<li id="li7"><a href="/cydq/">成语大全</a></li>
<li id="m"><a href="http://so.nulog.cn/">查询</a></li>
</ul>
<div class="search">
<div class="lable">
<select class="select">
<option value="all">综合</option>
<option value="gushi">诗词</option>
<option value="author">作者</option>
<option value="mingju">诗句</option>
<option value="book">典籍</option>
<option value="chengyu">成语</option>
<option value="star">句首</option>
<option value="end">句尾</option>
</select>
</div>
<div style="float:left;">
<input type="text" name="ekey" placeholder="多个关键字用空格隔开,最多两个空格" class="search-text" id="word" autocomplete="off" x-webkit-speech="" speech="">
</div>
<div style="float:left;"><input type="submit" value=" " class="search-btn" id="search_btn"></div>
<div style="margin-left:0px; display:none" id="box"></div>
</div>
</div>
</div>
</div>
<div class="main">
<div class="left">
<!--四块开始-->
<div class="kk k1"></div>
<div class="kk k2"></div>
<div class="kk k3"></div>
<div class="kk k4"></div>
<!--四块结束-->
<h2>古诗大全</h2>
<dl class="type">
<dt>类型</dt>
<dd style="height:30px;" class="all" id="type">
<A href="/gushi/0/0/0/1/0/0/" title='写景的古诗词'>写景</A><A href="/gushi/0/0/0/2/0/0/" title='关于咏物的古诗'>咏物</A><A href="/gushi/0/0/0/3/0/0/" title='关于描写春天的诗句'>春天</A><A href="/gushi/0/0/0/4/0/0/" title='关于描写夏天的诗句'>夏天</A><A href="/gushi/0/0/0/5/0/0/" title='关于描写秋天的诗句'>秋天</A><A href="/gushi/0/0/0/6/0/0/" title='关于描写冬天的诗句'>冬天</A><A href="/gushi/0/0/0/7/0/0/" title='关于描写雨的诗句'>写雨</A><A href="/gushi/0/0/0/8/0/0/" title='关于描写雪的诗句'>写雪</A><A href="/gushi/0/0/0/9/0/0/" title='关于写风的诗句'>写风</A><A href="/gushi/0/0/0/10/0/0/" title='关于写风的古诗词'>写花</A><A href="/gushi/0/0/0/11/0/0/" title='关于描写梅花的诗词'>梅花</A><A href="/gushi/0/0/0/12/0/0/" title='关于描写荷花的诗词'>荷花</A><A href="/gushi/0/0/0/13/0/0/" title='关于描写菊花的诗句'>菊花</A><A href="/gushi/0/0/0/14/0/0/" title='关于描写柳树的诗句'>柳树</A><A href="/gushi/0/0/0/15/0/0/" title='关于月亮的诗词'>月亮</A><A href="/gushi/0/0/0/16/0/0/" title='关于描写山水的诗词'>山水</A><A href="/gushi/0/0/0/17/0/0/" title='关于描写山的诗句'>写山</A><A href="/gushi/0/0/0/18/0/0/" title='关于描写水的诗句'>写水</A><A href="/gushi/0/0/0/19/0/0/" title='关于描写长江的古诗词'>长江</A><A href="/gushi/0/0/0/20/0/0/" title='关于描写黄河的古诗词'>黄河</A><A href="/gushi/0/0/0/21/0/0/" title='关于描写儿童的古诗词'>儿童</A><A href="/gushi/0/0/0/22/0/0/" title='关于描写鸟的诗词'>写鸟</A><A href="/gushi/0/0/0/23/0/0/" title='关于描写马的诗词'>写马</A><A href="/gushi/0/0/0/24/0/0/" title='关于描写田园的诗词'>田园</A><A href="/gushi/0/0/0/25/0/0/" title='关于描写边塞的诗词'>边塞</A><A href="/gushi/0/0/0/26/0/0/" title='含有地名的诗词'>地名</A><A href="/gushi/0/0/0/27/0/0/" title='抒情的古诗词'>抒情</A><A href="/gushi/0/0/0/28/0/0/" title=' 关于描写爱国的诗词'>爱国</A><A href="/gushi/0/0/0/29/0/0/" title='关于离别的诗词'>离别</A><A href="/gushi/0/0/0/30/0/0/" title='关于送别的诗词'>送别</A><A href="/gushi/0/0/0/31/0/0/" title='关于思乡的诗词句'>思乡</A><A href="/gushi/0/0/0/32/0/0/" title='关于思念的诗词句'>思念</A><A href="/gushi/0/0/0/33/0/0/" title='关于描写、赞美爱情的诗词句'>爱情</A><A href="/gushi/0/0/0/34/0/0/" title=''>励志</A><A href="/gushi/0/0/0/35/0/0/" title=''>哲理</A><A href="/gushi/0/0/0/36/0/0/" title=''>闺怨</A><A href="/gushi/0/0/0/37/0/0/" title=''>悼亡</A><A href="/gushi/0/0/0/38/0/0/" title=''>写人</A><A href="/gushi/0/0/0/39/0/0/" title=''>老师</A><A href="/gushi/0/0/0/40/0/0/" title=''>母亲</A><A href="/gushi/0/0/0/41/0/0/" title=''>友情</A><A href="/gushi/0/0/0/42/0/0/" title=''>战争</A><A href="/gushi/0/0/0/43/0/0/" title=''>读书</A><A href="/gushi/0/0/0/44/0/0/" title=''>惜时</A><A href="/gushi/0/0/0/45/0/0/" title=''>婉约</A><A href="/gushi/0/0/0/46/0/0/" title=''>豪放</A><A href="/gushi/0/0/0/47/0/0/" title=''>诗经</A><A href="/gushi/0/0/0/48/0/0/" title=''>民谣</A><A href="/gushi/0/0/0/49/0/0/" title='关于节日的古诗词'>节日</A><A href="/gushi/0/0/0/50/0/0/" title='关于描写春节的古诗'>春节</A><A href="/gushi/0/0/0/51/0/0/" title='关于元宵节的古诗词'>元宵节</A><A href="/gushi/0/0/0/52/0/0/" title=''>寒食节</A><A href="/gushi/0/0/0/53/0/0/" title='关于清明节的古诗大全'>清明节</A><A href="/gushi/0/0/0/54/0/0/" title='关于端午节的古诗词大全'>端午节</A><A href="/gushi/0/0/0/55/0/0/" title='关于七夕节的古诗词大全'>七夕节</A><A href="/gushi/0/0/0/56/0/0/" title='关于中秋节的古诗大全'>中秋节</A><A href="/gushi/0/0/0/57/0/0/" title='关于重阳节的古诗大全'>重阳节</A><A href="/gushi/0/0/0/58/0/0/" title=''>忧国忧民</A><A href="/gushi/0/0/0/59/0/0/" title=''>咏史怀古</A><A href="/gushi/0/0/0/60/0/0/" title='宋词精选'>宋词精选</A><A href="/gushi/0/0/0/61/0/0/" title='小学古诗'>小学古诗</A><A href="/gushi/0/0/0/62/0/0/" title='初中古诗'>初中古诗</A><A href="/gushi/0/0/0/63/0/0/" title='高中古诗'>高中古诗</A><A href="/gushi/0/0/0/71/0/0/" title='古文观止全文|翻译|鉴赏'>古文观止</A><A href="/gushi/0/0/0/83/0/0/" title=''>辞赋精选</A><A href="/gushi/0/0/0/64/0/0/" title='小学文言文大全|翻译|鉴赏'>小学文言文</A><A href="/gushi/0/0/0/65/0/0/" title='初中文言文大全|翻译|鉴赏'>初中文言文</A><A href="/gushi/0/0/0/66/0/0/" title='高中文言文大全|翻译|鉴赏'>高中文言文</A><A href="/gushi/0/0/0/67/0/0/" title='古诗十九首'>古诗十九首</A><A href="/gushi/0/0/0/68/0/0/" title='唐诗三百首|唐诗三百首全集赏析'>唐诗三百首</A><A href="/gushi/0/0/0/69/0/0/" title='古诗三百首'>古诗三百首</A><A href="/gushi/0/0/0/70/0/0/" title='宋词三百首全集原文|翻译|赏析'>宋词三百首</A>
</dd>
<div class="more"><img onclick="clickType('type')" alt="展开" src="/skin/more.gif" width="10" height="20"></div>
<dt>朝代</dt>
<dd>
<A href="/gushi/0/0/0/68/0/0/"><span>不限</span></A><A href="/gushi/11/0/0/68/0/0/">先秦</A><A href="/gushi/3/0/0/68/0/0/">汉朝</A><A href="/gushi/10/0/0/68/0/0/">魏晋</A><A href="/gushi/6/0/0/68/0/0/">南北朝</A><A href="/gushi/9/0/0/68/0/0/">唐朝</A><A href="/gushi/15/0/0/68/0/0/">隋朝</A><A href="/gushi/7/0/0/68/0/0/">宋朝</A><A href="/gushi/12/0/0/68/0/0/">元朝</A><A href="/gushi/5/0/0/68/0/0/">明朝</A><A href="/gushi/8/0/0/68/0/0/">清朝</A><A href="/gushi/4/0/0/68/0/0/">近代</A><A href="/gushi/2/0/0/68/0/0/">当代</A></dd>
<dt>形式</dt>
<dd>
<A href="/gushi/0/0/0/68/0/0/"><span>不限</span></A><A href="/gushi/0/0/16/68/0/0/">诗</A><A href="/gushi/0/0/13/68/0/0/">词</A><A href="/gushi/0/0/14/68/0/0/">曲</A><A href="/gushi/0/0/17/68/0/0/">文言文</A><A href="/gushi/0/0/12/68/0/0/">辞赋</A></dd>
</dl>
<ul>
<li>
<A href="/shiren/266.html" target=_blank><IMG alt="李商隐" src="/uploadsabcd/userup/lishangyin.jpg"></A>
<strong><a href="/shi/582.html" target=_blank>嫦娥(嫦娥应悔偷灵药)</A></strong>
<span>作者:<A href="/shiren/266.html" target=_blank>李商隐</A></span>
<p>云母屏风烛影深, 长河渐落晓星沉。 嫦娥应悔偷灵药, 碧海青天夜夜心。</p>
</li>
<li>
<A href="/shiren/495.html" target=_blank><IMG alt="韦庄" src="/uploadsabcd/userup/weizhuang.jpg"></A>
<strong><a href="/shi/62468.html" target=_blank>金陵图(依旧烟笼十里堤)</A></strong>
<span>作者:<A href="/shiren/495.html" target=_blank>韦庄</A></span>
<p>江雨霏霏江草齐,六朝如梦鸟空啼。 无情最是台城柳,依旧烟笼十里堤。</p>
</li>
<li>
<A href="/shiren/495.html" target=_blank><IMG alt="韦庄" src="/uploadsabcd/userup/weizhuang.jpg"></A>
<strong><a href="/shi/6836.html" target=_blank>金陵图</A></strong>
<span>作者:<A href="/shiren/495.html" target=_blank>韦庄</A></span>
<p>谁谓伤心画不成,画人心逐世人情。 君看六幅南朝事,老木寒云满故城。</p>
</li>
<li>
<A href="/shiren/100.html" target=_blank><IMG alt="杜牧" src="/uploadsabcd/userup/dumu.jpg"></A>
<strong><a href="/shi/496.html" target=_blank>遣怀</A></strong>
<span>作者:<A href="/shiren/100.html" target=_blank>杜牧</A></span>
<p> 落魄江湖载酒行, 楚腰纤细掌中轻。 十年一觉扬州梦, 赢得青楼薄倖名。 </p>
</li>
<li>
<A href="/shiren/584.html" target=_blank><IMG alt="元稹" src="/uploadsabcd/userup/yuanzhen.jpg"></A>
<strong><a href="/shi/874.html" target=_blank>行宫</A></strong>
<span>作者:<A href="/shiren/584.html" target=_blank>元稹</A></span>
<p> 寥落古行宫,宫花寂寞红。 白头宫女在,闲坐说玄宗。 </p>
</li>
<li>
<A href="/shiren/494.html" target=_blank><IMG alt="韦应物" src="/uploadsabcd/userup/weiyingwu.jpg"></A>
<strong><a href="/shi/4832.html" target=_blank>寄李儋元锡</A></strong>
<span>作者:<A href="/shiren/494.html" target=_blank>韦应物</A></span>
<p>去年花里逢君别,今日花开已一年。 世事茫茫难自料,春愁黯黯独成眠。 身多疾病思田里,邑有流亡愧俸钱。 闻道欲来相问讯,西楼望月几回圆。</p>
</li>
<li>
<A href="/shiren/31.html" target=_blank><IMG alt="岑参" src="/uploadsabcd/userup/cencan.jpg"></A>
<strong><a href="/shi/4842.html" target=_blank>白雪歌送武判官归京</A></strong>
<span>作者:<A href="/shiren/31.html" target=_blank>岑参</A></span>
<p>北风卷地白草折,胡天八月即飞雪。 忽如一夜春风来,千树万树梨花开。 散入珠帘湿罗幕,狐裘不暖锦衾薄。 将军角弓不得控,都护铁衣冷难着。(难着 一作:犹著) 瀚海阑干百丈冰,愁云惨淡万里凝。 中军置酒饮归客,胡琴琵琶与羌笛。 纷纷暮雪下辕门,风掣红旗冻不翻。</p>
</li>
<li>
<A href="/shiren/501.html" target=_blank><IMG alt="温庭筠" src="/uploadsabcd/userup/wentingyun.jpg"></A>
<strong><a href="/shi/5507.html" target=_blank>瑶瑟怨</A></strong>
<span>作者:<A href="/shiren/501.html" target=_blank>温庭筠</A></span>
<p>冰簟银床梦不成,碧天如水夜云轻。 雁声远过潇湘去,十二楼中月自明。</p>
</li>
<li>
<A href="/shiren/242.html" target=_blank><IMG alt="李白" src="/uploadsabcd/userup/libai.jpg"></A>
<strong><a href="/shi/4.html" target=_blank>黄鹤楼送孟浩然之广陵</A></strong>
<span>作者:<A href="/shiren/242.html" target=_blank>李白</A></span>
<p> 故人西辞黄鹤楼,烟花三月下扬州。 孤帆远影碧空尽,唯见长江天际流。 </p>
</li>
<li>
<strong><a href="/shi/4752.html" target=_blank>哥舒歌</A></strong>
<span>作者:<A href="/shiren/522.html" target=_blank>西鄙人</A></span>
<p>北斗七星高,哥舒夜带刀。 至今窥牧马,不敢过临洮。</p>
</li>
<li>
<A href="/shiren/306.html" target=_blank><IMG alt="刘长卿" src="/uploadsabcd/userup/liuchangqing.jpg"></A>
<strong><a href="/shi/5571.html" target=_blank>新年作</A></strong>
<span>作者:<A href="/shiren/306.html" target=_blank>刘长卿</A></span>
<p>乡心新岁切,天畔独潸然。老至居人下,春归在客先。 岭猿同旦暮,江柳共风烟。已似长沙傅,从今又几年。</p>
</li>
</ul>
<div class="page">
<a href="/gushi/0/0/0/68/0/1/" class="on">1</a><a href="/gushi/0/0/0/68/0/2/">2</a>
<a href="/gushi/0/0/0/68/0/3/">3</a>
<a href="/gushi/0/0/0/68/0/4/">4</a>
<a href="/gushi/0/0/0/68/0/5/">5</a>
<a href="/gushi/0/0/0/68/0/6/">6</a>
<a href="/gushi/0/0/0/68/0/7/">7</a>
<a href="/gushi/0/0/0/68/0/8/">8</a>
<a href="/gushi/0/0/0/68/0/2/">下一页</a><a href="/gushi/0/0/0/68/0/33/">末页</a><span>共362首诗词</span>
</div>
</div>
<div class="right">
<div class="newsgg">
</div>
<div class="title"><h2>热门诗词</h2></div>
<ul class="boli">
<!-- -->
<li><a title="渔父(屈原既放)" href="/shi/2029.html" target="_blank" style="display: inline-block;">渔父(屈原既放) [楚辞(先秦及汉代)]</a></li><!-- -->
<li><a title="渔父词(其十五)" href="/shi/57083.html" target="_blank" style="display: inline-block;">渔父词(其十五) [赵构]</a></li><!-- -->
<li><a title="祝英台近(耕云)" href="/shi/56622.html" target="_blank" style="display: inline-block;">祝英台近(耕云) [张炎]</a></li><!-- -->
<li><a title="庆清朝(牡丹)" href="/shi/50449.html" target="_blank" style="display: inline-block;">庆清朝(牡丹) [曹勋]</a></li><!-- -->
<li><a title="鹧鸪天·建康上元作" href="/shi/49666.html" target="_blank" style="display: inline-block;">鹧鸪天·建康上元作 [赵鼎]</a></li><!-- -->
<li><a title="【越调】寨儿个" href="/shi/59730.html" target="_blank" style="display: inline-block;">【越调】寨儿个 [张可久]</a></li><!-- -->
<li><a title="西江月(夏日有感)" href="/shi/52141.html" target="_blank" style="display: inline-block;">西江月(夏日有感) [赵长卿]</a></li><!-- -->
<li><a title="沁园春(用履齐多景楼韵)" href="/shi/57855.html" target="_blank" style="display: inline-block;">沁园春(用履齐多景楼韵) [程公许]</a></li><!-- -->
<li><a title="倒犯(蓝桥)" href="/shi/58165.html" target="_blank" style="display: inline-block;">倒犯(蓝桥) [杨泽民]</a></li><!-- -->
<li><a title="谒金门·春半" href="/shi/6996.html" target="_blank" style="display: inline-block;">谒金门·春半 [朱淑真]</a></li><!-- -->
</ul>
<div class="title"><h2>古文典籍</h2></div>
<ul>
<li>「<a href="/bookindex/95.html">诗经</a>」</li>
<li>「<a href="/bookindex/2.html">论语</a>」</li>
<li>「<a href="/bookindex/11.html">史记</a>」</li>
<li>「<a href="/bookindex/13.html">周易</a>」</li>
<li>「<a href="/bookindex/4.html">易传</a>」</li>
<li>「<a href="/bookindex/10.html">左传</a>」</li>
<li>「<a href="/bookindex/9.html">大学</a>」</li>
<li>「<a href="/bookindex/7.html">中庸</a>」</li>
<li>「<a href="/bookindex/39.html">尚书</a>」</li>
<li>「<a href="/bookindex/23.html">礼记</a>」</li>
<li>「<a href="/bookindex/80.html">周礼</a>」</li>
<li>「<a href="/bookindex/25.html">孟子</a>」</li>
<li>「<a href="/bookindex/29.html">老子</a>」</li>
<li>「<a href="/bookindex/51.html">吴子</a>」</li>
<li>「<a href="/bookindex/49.html">荀子</a>」</li>
<li>「<a href="/bookindex/20.html">庄子</a>」</li>
<li>「<a href="/bookindex/37.html">墨子</a>」</li>
<li>「<a href="/bookindex/61.html">管子</a>」</li>
<li>「<a href="/bookindex/68.html">列子</a>」</li>
<li>「<a href="/bookindex/82.html">宋书</a>」</li>
<li>「<a href="/bookindex/81.html">汉书</a>」</li>
<li>「<a href="/bookindex/77.html">晋书</a>」</li>
<li>「<a href="/bookindex/40.html">素书</a>」</li>
<li>「<a href="/bookindex/70.html">仪礼</a>」</li>
<li>「<a href="/bookindex/90.html">周书</a>」</li>
<li>「<a href="/bookindex/93.html">梁书</a>」</li>
<li>「<a href="/bookindex/87.html">隋书</a>」</li>
<li>「<a href="/bookindex/84.html">陈书</a>」</li>
<li>「<a href="/bookindex/85.html">魏书</a>」</li>
<li>「<a href="/bookindex/54.html">孝经</a>」</li>
<li>「<a href="/bookindex/53.html">将苑</a>」</li>
<li>「<a href="/bookindex/83.html">南齐书</a>」</li>
<li>「<a href="/bookindex/86.html">北齐书</a>」</li>
<li>「<a href="/bookindex/91.html">新唐书</a>」</li>
<li>「<a href="/bookindex/55.html">后汉书</a>」</li>
<li>「<a href="/bookindex/88.html">南史</a>」</li>
<li>「<a href="/bookindex/71.html">司马法</a>」</li>
<li>「<a href="/bookindex/79.html">水经注</a>」</li>
<li>「<a href="/bookindex/78.html">商君书</a>」</li>
<li>「<a href="/bookindex/65.html">尉缭子</a>」</li>
<li>「<a href="/bookindex/89.html">北史</a>」</li>
<li>「<a href="/bookindex/73.html">逸周书</a>」</li>
<li>「<a href="/bookindex/94.html">旧唐书</a>」</li>
<li>「<a href="/bookindex/12.html">三字经</a>」</li>
<li>「<a href="/bookindex/47.html">淮南子</a>」</li>
<li>「<a href="/bookindex/46.html">六韬</a>」</li>
<li>「<a href="/bookindex/21.html">鬼谷子</a>」</li>
<li>「<a href="/bookindex/22.html">三国志</a>」</li>
<li>「<a href="/bookindex/1.html">千字文</a>」</li>
<li>「<a href="/bookindex/26.html">伤寒论</a>」</li>
<li>「<a href="/bookindex/48.html">反经</a>」</li>
<li>「<a href="/bookindex/32.html">百家姓</a>」</li>
<li>「<a href="/bookindex/27.html">菜根谭</a>」</li>
<li>「<a href="/bookindex/34.html">弟子规</a>」</li>
<li>「<a href="/bookindex/33.html">金刚经</a>」</li>
<li>「<a href="/bookindex/62.html">论衡</a>」</li>
<li>「<a href="/bookindex/17.html">韩非子</a>」</li>
<li>「<a href="/bookindex/16.html">山海经</a>」</li>
<li>「<a href="/bookindex/30.html">战国策</a>」</li>
<li>「<a href="/bookindex/41.html">地藏经</a>」</li>
<li>「<a href="/bookindex/19.html">冰鉴</a>」</li>
<li>「<a href="/bookindex/74.html">围炉夜话</a>」</li>
<li>「<a href="/bookindex/66.html">六祖坛经</a>」</li>
<li>「<a href="/bookindex/72.html">睡虎地秦墓竹简</a>」</li>
<li>「<a href="/bookindex/3.html">资治通鉴</a>」</li>
<li>「<a href="/bookindex/67.html">续资治通鉴</a>」</li>
<li>「<a href="/bookindex/28.html">梦溪笔谈</a>」</li>
<li>「<a href="/bookindex/92.html">旧五代史</a>」</li>
<li>「<a href="/bookindex/60.html">文昌孝经</a>」</li>
<li>「<a href="/bookindex/43.html">四十二章经</a>」</li>
<li>「<a href="/bookindex/15.html">吕氏春秋</a>」</li>
<li>「<a href="/bookindex/14.html">了凡四训</a>」</li>
<li>「<a href="/bookindex/5.html">三十六计</a>」</li>
<li>「<a href="/bookindex/63.html">徐霞客游记</a>」</li>
<li>「<a href="/bookindex/18.html">黄帝内经</a>」</li>
<li>「<a href="/bookindex/59.html">黄帝四经</a>」</li>
<li>「<a href="/bookindex/8.html">孙子兵法</a>」</li>
<li>「<a href="/bookindex/38.html">孙膑兵法</a>」</li>
<li>「<a href="/bookindex/24.html">本草纲目</a>」</li>
<li>「<a href="/bookindex/64.html">孔子家语</a>」</li>
<li>「<a href="/bookindex/6.html">世说新语</a>」</li>
<li>「<a href="/bookindex/56.html">贞观政要</a>」</li>
<li>「<a href="/bookindex/45.html">颜氏家训</a>」</li>
<li>「<a href="/bookindex/69.html">容斋随笔</a>」</li>
<li>「<a href="/bookindex/35.html">文心雕龙</a>」</li>
<li>「<a href="/bookindex/76.html">农桑辑要</a>」</li>
</ul>
<div class="title"><h2>热门名句</h2></div>
<ul class="boli">
<!-- -->故不登高山,不知天之高也;不临深溪,不知地之厚也; 霜轻未杀萋萋草,日暖初干漠漠沙 新人虽完好,未若故人姝 夭红过眼随荣谢,菊秀兰香自占春 恐凤靴,挑菜归来,万一灞桥相见
</ul>
<div class="title"><h2>热门成语</h2></div>
<ul class="boli">
<!-- -->
<li><a title="朱槃玉敦" href="/chengyu/12709.html" target="_blank" style="display: inline-block;">朱槃玉敦 [zhū pán yù duì]</a></li><!-- -->
<li><a title="国步艰难" href="/chengyu/4074.html" target="_blank" style="display: inline-block;">国步艰难 [guó bù jiān nán]</a></li><!-- -->
<li><a title="擂鼓鸣金" href="/chengyu/6147.html" target="_blank" style="display: inline-block;">擂鼓鸣金 [léi gǔ míng jīn]</a></li><!-- -->
<li><a title="方凿圆枘" href="/chengyu/2989.html" target="_blank" style="display: inline-block;">方凿圆枘 [fāng záo yuán ruì]</a></li><!-- -->
<li><a title="疾风知劲草" href="/chengyu/4836.html" target="_blank" style="display: inline-block;">疾风知劲草 [jí fēng zhī jìn cǎo]</a></li><!-- -->
</ul>
</div>
</div>
<div style="background:#725B47">
<div class="footer">
<p>Copyright © 2015 - 2020 www.nulog.cn, All Rights Reserved.<a href="http://www.nulog.cn">中国古诗文网</a> </p>
<div class="disno">
</div>
</div>
</div>
</body>
</html>
import requests
from bs4 import BeautifulSoup
def get_data(url, headers):
'''
两个参数
:param url:统一资源定位符,请求网址
:param headers:请求头
:return data:list类型的所有古诗内容
'''
# ***************** Begin ******************** #
resp = requests.get(url,headers).text
soup=BeautifulSoup(resp, 'lxml')
data = soup.find('div', {'class': 'left'}).ul.find_all('li')
data = [i.p.text for i in data]
# ****************** end ********************* #
return data
<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="utf-8" />
<meta name="applicable-device" content="pc">
<title></title>
</head>
<body>
<div class="topbd">
<div class="header">
<div class="top">
<h1><a href="http://www.nulog.cn" title="中国古诗文网"></a></h1>
<ul class="nav">
<li id="i"><a href="http://www.nulog.cn">首页</a></li>
<li id="li1"><a href="/gushi/">古诗</a></li>
<li id="li2"><a href="/mingju/">名句</a></li>
<li id="li3"><a href="/shiren/">诗人</a></li>
<li id="li4"><a href="/book/">古籍</a></li>
<li id="li5"><a href="/shicilishi/">历史</a></li>
<li id="li6"><a href="/shicizhishi/">知识</a></li>
<li id="li7"><a href="/chengyu/">成语</a></li>
<li id="li7"><a href="/cydq/">成语大全</a></li>
<li id="m"><a href="http://so.nulog.cn/">查询</a></li>
</ul>
<div class="search">
<div class="lable">
<select class="select">
<option value="all">综合</option>
<option value="gushi">诗词</option>
<option value="author">作者</option>
<option value="mingju">诗句</option>
<option value="book">典籍</option>
<option value="chengyu">成语</option>
<option value="star">句首</option>
<option value="end">句尾</option>
</select>
</div>
<div style="float:left;">
<input type="text" name="ekey" placeholder="多个关键字用空格隔开,最多两个空格" class="search-text" id="word" autocomplete="off" x-webkit-speech="" speech="">
</div>
<div style="float:left;"><input type="submit" value=" " class="search-btn" id="search_btn"></div>
<div style="margin-left:0px; display:none" id="box"></div>
</div>
</div>
</div>
</div>
<div class="main">
<div class="left">
<!--四块开始-->
<div class="kk k1"></div>
<div class="kk k2"></div>
<div class="kk k3"></div>
<div class="kk k4"></div>
<!--四块结束-->
<h2>古诗大全</h2>
<dl class="type">
<dt>类型</dt>
<dd style="height:30px;" class="all" id="type">
<A href="/gushi/0/0/0/1/0/0/" title='写景的古诗词'>写景</A><A href="/gushi/0/0/0/2/0/0/" title='关于咏物的古诗'>咏物</A><A href="/gushi/0/0/0/3/0/0/" title='关于描写春天的诗句'>春天</A><A href="/gushi/0/0/0/4/0/0/" title='关于描写夏天的诗句'>夏天</A><A href="/gushi/0/0/0/5/0/0/" title='关于描写秋天的诗句'>秋天</A><A href="/gushi/0/0/0/6/0/0/" title='关于描写冬天的诗句'>冬天</A><A href="/gushi/0/0/0/7/0/0/" title='关于描写雨的诗句'>写雨</A><A href="/gushi/0/0/0/8/0/0/" title='关于描写雪的诗句'>写雪</A><A href="/gushi/0/0/0/9/0/0/" title='关于写风的诗句'>写风</A><A href="/gushi/0/0/0/10/0/0/" title='关于写风的古诗词'>写花</A><A href="/gushi/0/0/0/11/0/0/" title='关于描写梅花的诗词'>梅花</A><A href="/gushi/0/0/0/12/0/0/" title='关于描写荷花的诗词'>荷花</A><A href="/gushi/0/0/0/13/0/0/" title='关于描写菊花的诗句'>菊花</A><A href="/gushi/0/0/0/14/0/0/" title='关于描写柳树的诗句'>柳树</A><A href="/gushi/0/0/0/15/0/0/" title='关于月亮的诗词'>月亮</A><A href="/gushi/0/0/0/16/0/0/" title='关于描写山水的诗词'>山水</A><A href="/gushi/0/0/0/17/0/0/" title='关于描写山的诗句'>写山</A><A href="/gushi/0/0/0/18/0/0/" title='关于描写水的诗句'>写水</A><A href="/gushi/0/0/0/19/0/0/" title='关于描写长江的古诗词'>长江</A><A href="/gushi/0/0/0/20/0/0/" title='关于描写黄河的古诗词'>黄河</A><A href="/gushi/0/0/0/21/0/0/" title='关于描写儿童的古诗词'>儿童</A><A href="/gushi/0/0/0/22/0/0/" title='关于描写鸟的诗词'>写鸟</A><A href="/gushi/0/0/0/23/0/0/" title='关于描写马的诗词'>写马</A><A href="/gushi/0/0/0/24/0/0/" title='关于描写田园的诗词'>田园</A><A href="/gushi/0/0/0/25/0/0/" title='关于描写边塞的诗词'>边塞</A><A href="/gushi/0/0/0/26/0/0/" title='含有地名的诗词'>地名</A><A href="/gushi/0/0/0/27/0/0/" title='抒情的古诗词'>抒情</A><A href="/gushi/0/0/0/28/0/0/" title=' 关于描写爱国的诗词'>爱国</A><A href="/gushi/0/0/0/29/0/0/" title='关于离别的诗词'>离别</A><A href="/gushi/0/0/0/30/0/0/" title='关于送别的诗词'>送别</A><A href="/gushi/0/0/0/31/0/0/" title='关于思乡的诗词句'>思乡</A><A href="/gushi/0/0/0/32/0/0/" title='关于思念的诗词句'>思念</A><A href="/gushi/0/0/0/33/0/0/" title='关于描写、赞美爱情的诗词句'>爱情</A><A href="/gushi/0/0/0/34/0/0/" title=''>励志</A><A href="/gushi/0/0/0/35/0/0/" title=''>哲理</A><A href="/gushi/0/0/0/36/0/0/" title=''>闺怨</A><A href="/gushi/0/0/0/37/0/0/" title=''>悼亡</A><A href="/gushi/0/0/0/38/0/0/" title=''>写人</A><A href="/gushi/0/0/0/39/0/0/" title=''>老师</A><A href="/gushi/0/0/0/40/0/0/" title=''>母亲</A><A href="/gushi/0/0/0/41/0/0/" title=''>友情</A><A href="/gushi/0/0/0/42/0/0/" title=''>战争</A><A href="/gushi/0/0/0/43/0/0/" title=''>读书</A><A href="/gushi/0/0/0/44/0/0/" title=''>惜时</A><A href="/gushi/0/0/0/45/0/0/" title=''>婉约</A><A href="/gushi/0/0/0/46/0/0/" title=''>豪放</A><A href="/gushi/0/0/0/47/0/0/" title=''>诗经</A><A href="/gushi/0/0/0/48/0/0/" title=''>民谣</A><A href="/gushi/0/0/0/49/0/0/" title='关于节日的古诗词'>节日</A><A href="/gushi/0/0/0/50/0/0/" title='关于描写春节的古诗'>春节</A><A href="/gushi/0/0/0/51/0/0/" title='关于元宵节的古诗词'>元宵节</A><A href="/gushi/0/0/0/52/0/0/" title=''>寒食节</A><A href="/gushi/0/0/0/53/0/0/" title='关于清明节的古诗大全'>清明节</A><A href="/gushi/0/0/0/54/0/0/" title='关于端午节的古诗词大全'>端午节</A><A href="/gushi/0/0/0/55/0/0/" title='关于七夕节的古诗词大全'>七夕节</A><A href="/gushi/0/0/0/56/0/0/" title='关于中秋节的古诗大全'>中秋节</A><A href="/gushi/0/0/0/57/0/0/" title='关于重阳节的古诗大全'>重阳节</A><A href="/gushi/0/0/0/58/0/0/" title=''>忧国忧民</A><A href="/gushi/0/0/0/59/0/0/" title=''>咏史怀古</A><A href="/gushi/0/0/0/60/0/0/" title='宋词精选'>宋词精选</A><A href="/gushi/0/0/0/61/0/0/" title='小学古诗'>小学古诗</A><A href="/gushi/0/0/0/62/0/0/" title='初中古诗'>初中古诗</A><A href="/gushi/0/0/0/63/0/0/" title='高中古诗'>高中古诗</A><A href="/gushi/0/0/0/71/0/0/" title='古文观止全文|翻译|鉴赏'>古文观止</A><A href="/gushi/0/0/0/83/0/0/" title=''>辞赋精选</A><A href="/gushi/0/0/0/64/0/0/" title='小学文言文大全|翻译|鉴赏'>小学文言文</A><A href="/gushi/0/0/0/65/0/0/" title='初中文言文大全|翻译|鉴赏'>初中文言文</A><A href="/gushi/0/0/0/66/0/0/" title='高中文言文大全|翻译|鉴赏'>高中文言文</A><A href="/gushi/0/0/0/67/0/0/" title='古诗十九首'>古诗十九首</A><A href="/gushi/0/0/0/68/0/0/" title='唐诗三百首|唐诗三百首全集赏析'>唐诗三百首</A><A href="/gushi/0/0/0/69/0/0/" title='古诗三百首'>古诗三百首</A><A href="/gushi/0/0/0/70/0/0/" title='宋词三百首全集原文|翻译|赏析'>宋词三百首</A>
</dd>
<div class="more"><img onclick="clickType('type')" alt="展开" src="/skin/more.gif" width="10" height="20"></div>
<dt>朝代</dt>
<dd>
<A href="/gushi/0/0/0/68/0/0/"><span>不限</span></A><A href="/gushi/11/0/0/68/0/0/">先秦</A><A href="/gushi/3/0/0/68/0/0/">汉朝</A><A href="/gushi/10/0/0/68/0/0/">魏晋</A><A href="/gushi/6/0/0/68/0/0/">南北朝</A><A href="/gushi/9/0/0/68/0/0/">唐朝</A><A href="/gushi/15/0/0/68/0/0/">隋朝</A><A href="/gushi/7/0/0/68/0/0/">宋朝</A><A href="/gushi/12/0/0/68/0/0/">元朝</A><A href="/gushi/5/0/0/68/0/0/">明朝</A><A href="/gushi/8/0/0/68/0/0/">清朝</A><A href="/gushi/4/0/0/68/0/0/">近代</A><A href="/gushi/2/0/0/68/0/0/">当代</A></dd>
<dt>形式</dt>
<dd>
<A href="/gushi/0/0/0/68/0/0/"><span>不限</span></A><A href="/gushi/0/0/16/68/0/0/">诗</A><A href="/gushi/0/0/13/68/0/0/">词</A><A href="/gushi/0/0/14/68/0/0/">曲</A><A href="/gushi/0/0/17/68/0/0/">文言文</A><A href="/gushi/0/0/12/68/0/0/">辞赋</A></dd>
</dl>
<ul>
<li>
<A href="/shiren/266.html" target=_blank><IMG alt="李商隐" src="/uploadsabcd/userup/lishangyin.jpg"></A>
<strong><a href="/shi/582.html" target=_blank>嫦娥(嫦娥应悔偷灵药)</A></strong>
<span>作者:<A href="/shiren/266.html" target=_blank>李商隐</A></span>
<p>云母屏风烛影深, 长河渐落晓星沉。 嫦娥应悔偷灵药, 碧海青天夜夜心。</p>
</li>
<li>
<A href="/shiren/495.html" target=_blank><IMG alt="韦庄" src="/uploadsabcd/userup/weizhuang.jpg"></A>
<strong><a href="/shi/62468.html" target=_blank>金陵图(依旧烟笼十里堤)</A></strong>
<span>作者:<A href="/shiren/495.html" target=_blank>韦庄</A></span>
<p>江雨霏霏江草齐,六朝如梦鸟空啼。 无情最是台城柳,依旧烟笼十里堤。</p>
</li>
<li>
<A href="/shiren/495.html" target=_blank><IMG alt="韦庄" src="/uploadsabcd/userup/weizhuang.jpg"></A>
<strong><a href="/shi/6836.html" target=_blank>金陵图</A></strong>
<span>作者:<A href="/shiren/495.html" target=_blank>韦庄</A></span>
<p>谁谓伤心画不成,画人心逐世人情。 君看六幅南朝事,老木寒云满故城。</p>
</li>
<li>
<A href="/shiren/100.html" target=_blank><IMG alt="杜牧" src="/uploadsabcd/userup/dumu.jpg"></A>
<strong><a href="/shi/496.html" target=_blank>遣怀</A></strong>
<span>作者:<A href="/shiren/100.html" target=_blank>杜牧</A></span>
<p> 落魄江湖载酒行, 楚腰纤细掌中轻。 十年一觉扬州梦, 赢得青楼薄倖名。 </p>
</li>
<li>
<A href="/shiren/584.html" target=_blank><IMG alt="元稹" src="/uploadsabcd/userup/yuanzhen.jpg"></A>
<strong><a href="/shi/874.html" target=_blank>行宫</A></strong>
<span>作者:<A href="/shiren/584.html" target=_blank>元稹</A></span>
<p> 寥落古行宫,宫花寂寞红。 白头宫女在,闲坐说玄宗。 </p>
</li>
<li>
<A href="/shiren/494.html" target=_blank><IMG alt="韦应物" src="/uploadsabcd/userup/weiyingwu.jpg"></A>
<strong><a href="/shi/4832.html" target=_blank>寄李儋元锡</A></strong>
<span>作者:<A href="/shiren/494.html" target=_blank>韦应物</A></span>
<p>去年花里逢君别,今日花开已一年。 世事茫茫难自料,春愁黯黯独成眠。 身多疾病思田里,邑有流亡愧俸钱。 闻道欲来相问讯,西楼望月几回圆。</p>
</li>
<li>
<A href="/shiren/31.html" target=_blank><IMG alt="岑参" src="/uploadsabcd/userup/cencan.jpg"></A>
<strong><a href="/shi/4842.html" target=_blank>白雪歌送武判官归京</A></strong>
<span>作者:<A href="/shiren/31.html" target=_blank>岑参</A></span>
<p>北风卷地白草折,胡天八月即飞雪。 忽如一夜春风来,千树万树梨花开。 散入珠帘湿罗幕,狐裘不暖锦衾薄。 将军角弓不得控,都护铁衣冷难着。(难着 一作:犹著) 瀚海阑干百丈冰,愁云惨淡万里凝。 中军置酒饮归客,胡琴琵琶与羌笛。 纷纷暮雪下辕门,风掣红旗冻不翻。</p>
</li>
<li>
<A href="/shiren/501.html" target=_blank><IMG alt="温庭筠" src="/uploadsabcd/userup/wentingyun.jpg"></A>
<strong><a href="/shi/5507.html" target=_blank>瑶瑟怨</A></strong>
<span>作者:<A href="/shiren/501.html" target=_blank>温庭筠</A></span>
<p>冰簟银床梦不成,碧天如水夜云轻。 雁声远过潇湘去,十二楼中月自明。</p>
</li>
<li>
<A href="/shiren/242.html" target=_blank><IMG alt="李白" src="/uploadsabcd/userup/libai.jpg"></A>
<strong><a href="/shi/4.html" target=_blank>黄鹤楼送孟浩然之广陵</A></strong>
<span>作者:<A href="/shiren/242.html" target=_blank>李白</A></span>
<p> 故人西辞黄鹤楼,烟花三月下扬州。 孤帆远影碧空尽,唯见长江天际流。 </p>
</li>
<li>
<strong><a href="/shi/4752.html" target=_blank>哥舒歌</A></strong>
<span>作者:<A href="/shiren/522.html" target=_blank>西鄙人</A></span>
<p>北斗七星高,哥舒夜带刀。 至今窥牧马,不敢过临洮。</p>
</li>
<li>
<A href="/shiren/306.html" target=_blank><IMG alt="刘长卿" src="/uploadsabcd/userup/liuchangqing.jpg"></A>
<strong><a href="/shi/5571.html" target=_blank>新年作</A></strong>
<span>作者:<A href="/shiren/306.html" target=_blank>刘长卿</A></span>
<p>乡心新岁切,天畔独潸然。老至居人下,春归在客先。 岭猿同旦暮,江柳共风烟。已似长沙傅,从今又几年。</p>
</li>
</ul>
<div class="page">
<a href="/gushi/0/0/0/68/0/1/" class="on">1</a><a href="/gushi/0/0/0/68/0/2/">2</a>
<a href="/gushi/0/0/0/68/0/3/">3</a>
<a href="/gushi/0/0/0/68/0/4/">4</a>
<a href="/gushi/0/0/0/68/0/5/">5</a>
<a href="/gushi/0/0/0/68/0/6/">6</a>
<a href="/gushi/0/0/0/68/0/7/">7</a>
<a href="/gushi/0/0/0/68/0/8/">8</a>
<a href="/gushi/0/0/0/68/0/2/">下一页</a><a href="/gushi/0/0/0/68/0/33/">末页</a><span>共362首诗词</span>
</div>
</div>
<div class="right">
<div class="newsgg">
</div>
<div class="title"><h2>热门诗词</h2></div>
<ul class="boli">
<!-- -->
<li><a title="渔父(屈原既放)" href="/shi/2029.html" target="_blank" style="display: inline-block;">渔父(屈原既放) [楚辞(先秦及汉代)]</a></li><!-- -->
<li><a title="渔父词(其十五)" href="/shi/57083.html" target="_blank" style="display: inline-block;">渔父词(其十五) [赵构]</a></li><!-- -->
<li><a title="祝英台近(耕云)" href="/shi/56622.html" target="_blank" style="display: inline-block;">祝英台近(耕云) [张炎]</a></li><!-- -->
<li><a title="庆清朝(牡丹)" href="/shi/50449.html" target="_blank" style="display: inline-block;">庆清朝(牡丹) [曹勋]</a></li><!-- -->
<li><a title="鹧鸪天·建康上元作" href="/shi/49666.html" target="_blank" style="display: inline-block;">鹧鸪天·建康上元作 [赵鼎]</a></li><!-- -->
<li><a title="【越调】寨儿个" href="/shi/59730.html" target="_blank" style="display: inline-block;">【越调】寨儿个 [张可久]</a></li><!-- -->
<li><a title="西江月(夏日有感)" href="/shi/52141.html" target="_blank" style="display: inline-block;">西江月(夏日有感) [赵长卿]</a></li><!-- -->
<li><a title="沁园春(用履齐多景楼韵)" href="/shi/57855.html" target="_blank" style="display: inline-block;">沁园春(用履齐多景楼韵) [程公许]</a></li><!-- -->
<li><a title="倒犯(蓝桥)" href="/shi/58165.html" target="_blank" style="display: inline-block;">倒犯(蓝桥) [杨泽民]</a></li><!-- -->
<li><a title="谒金门·春半" href="/shi/6996.html" target="_blank" style="display: inline-block;">谒金门·春半 [朱淑真]</a></li><!-- -->
</ul>
<div class="title"><h2>古文典籍</h2></div>
<ul>
<li>「<a href="/bookindex/95.html">诗经</a>」</li>
<li>「<a href="/bookindex/2.html">论语</a>」</li>
<li>「<a href="/bookindex/11.html">史记</a>」</li>
<li>「<a href="/bookindex/13.html">周易</a>」</li>
<li>「<a href="/bookindex/4.html">易传</a>」</li>
<li>「<a href="/bookindex/10.html">左传</a>」</li>
<li>「<a href="/bookindex/9.html">大学</a>」</li>
<li>「<a href="/bookindex/7.html">中庸</a>」</li>
<li>「<a href="/bookindex/39.html">尚书</a>」</li>
<li>「<a href="/bookindex/23.html">礼记</a>」</li>
<li>「<a href="/bookindex/80.html">周礼</a>」</li>
<li>「<a href="/bookindex/25.html">孟子</a>」</li>
<li>「<a href="/bookindex/29.html">老子</a>」</li>
<li>「<a href="/bookindex/51.html">吴子</a>」</li>
<li>「<a href="/bookindex/49.html">荀子</a>」</li>
<li>「<a href="/bookindex/20.html">庄子</a>」</li>
<li>「<a href="/bookindex/37.html">墨子</a>」</li>
<li>「<a href="/bookindex/61.html">管子</a>」</li>
<li>「<a href="/bookindex/68.html">列子</a>」</li>
<li>「<a href="/bookindex/82.html">宋书</a>」</li>
<li>「<a href="/bookindex/81.html">汉书</a>」</li>
<li>「<a href="/bookindex/77.html">晋书</a>」</li>
<li>「<a href="/bookindex/40.html">素书</a>」</li>
<li>「<a href="/bookindex/70.html">仪礼</a>」</li>
<li>「<a href="/bookindex/90.html">周书</a>」</li>
<li>「<a href="/bookindex/93.html">梁书</a>」</li>
<li>「<a href="/bookindex/87.html">隋书</a>」</li>
<li>「<a href="/bookindex/84.html">陈书</a>」</li>
<li>「<a href="/bookindex/85.html">魏书</a>」</li>
<li>「<a href="/bookindex/54.html">孝经</a>」</li>
<li>「<a href="/bookindex/53.html">将苑</a>」</li>
<li>「<a href="/bookindex/83.html">南齐书</a>」</li>
<li>「<a href="/bookindex/86.html">北齐书</a>」</li>
<li>「<a href="/bookindex/91.html">新唐书</a>」</li>
<li>「<a href="/bookindex/55.html">后汉书</a>」</li>
<li>「<a href="/bookindex/88.html">南史</a>」</li>
<li>「<a href="/bookindex/71.html">司马法</a>」</li>
<li>「<a href="/bookindex/79.html">水经注</a>」</li>
<li>「<a href="/bookindex/78.html">商君书</a>」</li>
<li>「<a href="/bookindex/65.html">尉缭子</a>」</li>
<li>「<a href="/bookindex/89.html">北史</a>」</li>
<li>「<a href="/bookindex/73.html">逸周书</a>」</li>
<li>「<a href="/bookindex/94.html">旧唐书</a>」</li>
<li>「<a href="/bookindex/12.html">三字经</a>」</li>
<li>「<a href="/bookindex/47.html">淮南子</a>」</li>
<li>「<a href="/bookindex/46.html">六韬</a>」</li>
<li>「<a href="/bookindex/21.html">鬼谷子</a>」</li>
<li>「<a href="/bookindex/22.html">三国志</a>」</li>
<li>「<a href="/bookindex/1.html">千字文</a>」</li>
<li>「<a href="/bookindex/26.html">伤寒论</a>」</li>
<li>「<a href="/bookindex/48.html">反经</a>」</li>
<li>「<a href="/bookindex/32.html">百家姓</a>」</li>
<li>「<a href="/bookindex/27.html">菜根谭</a>」</li>
<li>「<a href="/bookindex/34.html">弟子规</a>」</li>
<li>「<a href="/bookindex/33.html">金刚经</a>」</li>
<li>「<a href="/bookindex/62.html">论衡</a>」</li>
<li>「<a href="/bookindex/17.html">韩非子</a>」</li>
<li>「<a href="/bookindex/16.html">山海经</a>」</li>
<li>「<a href="/bookindex/30.html">战国策</a>」</li>
<li>「<a href="/bookindex/41.html">地藏经</a>」</li>
<li>「<a href="/bookindex/19.html">冰鉴</a>」</li>
<li>「<a href="/bookindex/74.html">围炉夜话</a>」</li>
<li>「<a href="/bookindex/66.html">六祖坛经</a>」</li>
<li>「<a href="/bookindex/72.html">睡虎地秦墓竹简</a>」</li>
<li>「<a href="/bookindex/3.html">资治通鉴</a>」</li>
<li>「<a href="/bookindex/67.html">续资治通鉴</a>」</li>
<li>「<a href="/bookindex/28.html">梦溪笔谈</a>」</li>
<li>「<a href="/bookindex/92.html">旧五代史</a>」</li>
<li>「<a href="/bookindex/60.html">文昌孝经</a>」</li>
<li>「<a href="/bookindex/43.html">四十二章经</a>」</li>
<li>「<a href="/bookindex/15.html">吕氏春秋</a>」</li>
<li>「<a href="/bookindex/14.html">了凡四训</a>」</li>
<li>「<a href="/bookindex/5.html">三十六计</a>」</li>
<li>「<a href="/bookindex/63.html">徐霞客游记</a>」</li>
<li>「<a href="/bookindex/18.html">黄帝内经</a>」</li>
<li>「<a href="/bookindex/59.html">黄帝四经</a>」</li>
<li>「<a href="/bookindex/8.html">孙子兵法</a>」</li>
<li>「<a href="/bookindex/38.html">孙膑兵法</a>」</li>
<li>「<a href="/bookindex/24.html">本草纲目</a>」</li>
<li>「<a href="/bookindex/64.html">孔子家语</a>」</li>
<li>「<a href="/bookindex/6.html">世说新语</a>」</li>
<li>「<a href="/bookindex/56.html">贞观政要</a>」</li>
<li>「<a href="/bookindex/45.html">颜氏家训</a>」</li>
<li>「<a href="/bookindex/69.html">容斋随笔</a>」</li>
<li>「<a href="/bookindex/35.html">文心雕龙</a>」</li>
<li>「<a href="/bookindex/76.html">农桑辑要</a>」</li>
</ul>
<div class="title"><h2>热门名句</h2></div>
<ul class="boli">
<!-- -->故不登高山,不知天之高也;不临深溪,不知地之厚也; 霜轻未杀萋萋草,日暖初干漠漠沙 新人虽完好,未若故人姝 夭红过眼随荣谢,菊秀兰香自占春 恐凤靴,挑菜归来,万一灞桥相见
</ul>
<div class="title"><h2>热门成语</h2></div>
<ul class="boli">
<!-- -->
<li><a title="朱槃玉敦" href="/chengyu/12709.html" target="_blank" style="display: inline-block;">朱槃玉敦 [zhū pán yù duì]</a></li><!-- -->
<li><a title="国步艰难" href="/chengyu/4074.html" target="_blank" style="display: inline-block;">国步艰难 [guó bù jiān nán]</a></li><!-- -->
<li><a title="擂鼓鸣金" href="/chengyu/6147.html" target="_blank" style="display: inline-block;">擂鼓鸣金 [léi gǔ míng jīn]</a></li><!-- -->
<li><a title="方凿圆枘" href="/chengyu/2989.html" target="_blank" style="display: inline-block;">方凿圆枘 [fāng záo yuán ruì]</a></li><!-- -->
<li><a title="疾风知劲草" href="/chengyu/4836.html" target="_blank" style="display: inline-block;">疾风知劲草 [jí fēng zhī jìn cǎo]</a></li><!-- -->
</ul>
</div>
</div>
<div style="background:#725B47">
<div class="footer">
<p>Copyright © 2015 - 2020 www.nulog.cn, All Rights Reserved.<a href="http://www.nulog.cn">中国古诗文网</a> </p>
<div class="disno">
</div>
</div>
</div>
</body>
</html>
# -*- coding: utf-8 -*-
import urllib.request as req
import os
import hashlib
# 国防科技大学本科招生信息网中录取分数网页URL:
url = 'http://www.gotonudt.cn/site/gfkdbkzsxxw/lqfs/index.html' # 录取分数网页URL
def step1():
# 请按下面的注释提示添加代码,完成相应功能
#********** Begin *********#
# 1.将网页内容保存到data
webpage = req.urlopen(url) # 按照类文件的方式打开网页
data = webpage.read() # 一次性读取网页的所有数据
# 2.将读data以二进制写模式写入以学号命名的 “nudt.txt” 文件:
outfile = open("nudt.txt", 'wb') # 按二进制写方式打开文件
outfile.write(data) # 将网页数据写入文件
outfile.close()
#********** End **********#
# -*- coding: utf-8 -*-
import urllib.request as req
# 国防科技大学本科招生信息网中录取分数网页URL:
url = 'http://www.gotonudt.cn/site/gfkdbkzsxxw/lqfs/index.html' # 录取分数网页URL
webpage = req.urlopen(url) # 按照类文件的方式打开网页
data = webpage.read() # 一次性读取网页的所有数据
data = data.decode('utf-8') # 将byte类型的data解码为字符串(否则后面查找就要另外处理了)
def step2():
# 建立空列表urls,来保存子网页的url
urls = []
# 请按下面的注释提示添加代码,完成相应功能
#********** Begin *********#
# 从data中提取2016到2012每一年分数线子网站地址添加到urls列表中
years = [2016, 2015, 2014, 2013, 2012]
for year in years:
index = data.find("国防科技大学%s年录取分数统计" %year)
href = data[index-80:index-39] # 根据单个特征串提取url子串
website = 'http://www.gotonudt.cn'
urls.append(website+href)
#********** End **********#
return urls
# -*- coding: utf-8 -*-
import urllib.request as req
import re
# 国防科技大学本科招生信息网中2016年录取分数网页URL:
url = 'http://www.gotonudt.cn/site/gfkdbkzsxxw/lqfs/info/2017/717.html'
webpage = req.urlopen(url) # 根据超链访问链接的网页
data = webpage.read() # 读取超链网页数据
data = data.decode('utf-8') # byte类型解码为字符串
# 获取网页中的第一个表格中所有内容:
table = re.findall(r'', data, re.S)
firsttable = table[0] # 取网页中的第一个表格
# 数据清洗,将表中的 ,\u3000,和空格号去掉
firsttable = firsttable.replace(' ', '')
firsttable = firsttable.replace('\u3000', '')
firsttable = firsttable.replace(' ', '')
def step3():
score = []
# 请按下面的注释提示添加代码,完成相应功能,若要查看详细html代码,可在浏览器中打开url,查看页面源代码。
#********** Begin *********#
# 1.按tr标签对获取表格中所有行,保存在列表rows中:
rows = re.findall(r'', firsttable, re.S)
# 2.迭代rows中的所有元素,获取每一行的td标签内的数据,并把数据组成item列表,将每一个item添加到scorelist列表:
scorelist = []
for row in rows:
items = []
tds = re.findall(r'(.*?)', row, re.S)
for td in tds:
rightindex = td.find('') # 返回-1表示没有找到
leftindex = td[:rightindex].rfind('>')
items.append(td[leftindex+1:rightindex])
scorelist.append(items)
# 3.将由省份,分数组成的7元列表(分数不存在的用\代替)作为元素保存到新列表score中,不要保存多余信息
for record in scorelist[3:]:
record.pop()
score.append(record)
#********** End **********#
return score
爬虫进阶
第1关 单网页爬取
import urllib.request
import csv
import re
# ********** Begin ********** #
data=urllib.request.urlopen("http://www.jd.com").read().decode("utf-8","ignore")
#打开京东,读取并爬到内存中,解码, 并赋值给data
urllib.request.urlretrieve("http://www.jd.com",filename="./step1/京东.html")
#打开京东,读取保存到本地
# ********** End ********** #
# ********** Begin ********** #
#正则表达式(Regular Expression)
pattern="(.*?) "
#re.compile()指编译正则表达式
#re.S是模式修正符,网页信息往往包含多行内容,re.S可以消除多行影响
title=set(re.compile(pattern,re.S).findall(data))
#保存数据到csv文件中
with open("./step1/csv_file.csv", 'w') as f:
f_csv = csv.writer(f)
f_csv.writerow(title)
# ********** End ********** #
第2关 网站爬取策略
from bs4 import BeautifulSoup
import requests
import re
#自定义队列类
class linkQuence:
def __init__(self):
# 已访问的url集合
self.visted = []
# 待访问的url集合
self.unVisited = []
# 获取访问过的url队列
def getVisitedUrl(self):
return self.visted
# 获取未访问的url队列
def getUnvisitedUrl(self):
return self.unVisited
# 添加到访问过得url队列中
def addVisitedUrl(self, url):
self.visted.append(url)
# 移除访问过得url
def removeVisitedUrl(self, url):
self.visted.remove(url)
# 未访问过得url出队列
def unVisitedUrlDeQuence(self):
try:
return self.unVisited.pop()
except:
return None
# 保证每个url只被访问一次
def addUnvisitedUrl(self, url):
if url != "" and url not in self.visted and url not in self.unVisited:
self.unVisited.insert(0, url)
# 获得已访问的url数目
def getVisitedUrlCount(self):
return len(self.visted)
# 获得未访问的url数目
def getUnvistedUrlCount(self):
return len(self.unVisited)
# 判断未访问的url队列是否为空
def unVisitedUrlsEnmpy(self):
return len(self.unVisited) == 0
class MyCrawler:
def __init__(self, seeds):
# 初始化当前抓取的深度
self.current_deepth = 1
# 使用种子初始化url队列
self.linkQuence = linkQuence()
if isinstance(seeds, str):
self.linkQuence.addUnvisitedUrl(seeds)
if isinstance(seeds, list):
for i in seeds:
self.linkQuence.addUnvisitedUrl(i)
print("Add the seeds url %s to the unvisited url list" % str(self.linkQuence.unVisited))
# 抓取过程主函数
def crawling(self, seeds, crawl_deepth):
# ********** Begin **********#
# 循环条件:抓取深度不超过crawl_deepth
while self.current_deepth <= crawl_deepth:
# 循环条件:待抓取的链接不空
while not self.linkQuence.unVisitedUrlsEnmpy():
# 队头url出队列
visitUrl = self.linkQuence.unVisitedUrlDeQuence()
if visitUrl is None or visitUrl == "":
continue
# 获取超链接
links = self.getHyperLinks(visitUrl)
# 将url放入已访问的url中
self.linkQuence.addVisitedUrl(visitUrl)
# 未访问的url入列
for link in links:
self.linkQuence.addUnvisitedUrl(link)
self.current_deepth += 1
# ********** End **********#
# 获取源码中得超链接
def getHyperLinks(self, url):
# ********** Begin **********#
links = []
data = self.getPageSource(url)
soup = BeautifulSoup(data,'html.parser')
a = soup.findAll("a", {"href": re.compile('^http|^/')})
for i in a:
if i["href"].find("http://") != -1:
links.append(i["href"])
return links
# ********** End **********#
# 获取网页源码
def getPageSource(self, url):
# ********** Begin **********#
try:
r = requests.get(url)
r.raise_for_status()
r.encoding = 'utf-8'
return r.text
except:
return ''
# ********** End **********#
def main(seeds="http://www.baidu.com", crawl_deepth=3):
craw = MyCrawler(seeds)
craw.crawling(seeds, crawl_deepth)
return craw.linkQuence.getVisitedUrl()
第3关 爬取与反爬取进阶
import urllib.request
import re
import random
#请求头
uapools=[
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.79 Safari/537.36 Edge/14.14393",
"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.22 Safari/537.36 SE 2.X MetaSr 1.0",
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Maxthon 2.0)",
]
def UA():
#********** Begin **********#
# 使用随机请求头
opener=urllib.request.build_opener()
thisua=random.choice(uapools)
ua=("User-Agent",thisua)
opener.addheaders=[ua]
urllib.request.install_opener(opener)
#********** End **********#
def main(page): # page为页号,int类型
#********** Begin **********#
UA()
#构造不同页码对应网址
thisurl="http://www.qiushibaike.com/text/page/"+str(page)+"/"
data=urllib.request.urlopen(thisurl).read().decode("utf-8","ignore")
#利用<div class="content">提取段子内容
pat='.*?(.*?).*?'
rst=re.compile(pat,re.S).findall(data)
with open("./step4/content.txt","a",encoding="utf-8") as f:
f.write("\n".join(rst))
#********** End **********#
Scrapy爬虫基础
第1关 Scarpy安装与项目创建
scrapy startproject HelloWorld
cd HelloWorld
scrapy genspider world www.baidu.com
第2关 Scrapy核心原理
# -*- coding: utf-8 -*-
import scrapy
class WorldSpider(scrapy.Spider):
name = 'world'
allowed_domains = ['www.baidu.com']
start_urls = ['http://www.baidu.com/']
def parse(self, response):
# ********** Begin *********#
# 将获取网页源码本地持久化
with open('baidu.html','wb') as f:
f.write(response.body)
# ********** End *********#
Scrapy爬虫之网站图片爬取
第1关 【有多个文件】爬取网站实训图片的链接
文件一
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>花</title>
</head>
<body>
<div class="box">
<div>
<a href="/static/app1/imgs/1.png" target="_blank">
<img src="/static/app1/imgs/1.png" alt="未显示">
</a>
</div>
<div>
<a href="/static/app1/imgs/10.png" target="_blank">
<img src="/static/app1/imgs/10.png" alt="未显示">
</a>
</div>
<div>
<a href="/static/app1/imgs/11.png" target="_blank">
<img src="/static/app1/imgs/11.png" alt="未显示">
</a>
</div>
<div>
<a href="/static/app1/imgs/12.png" target="_blank">
<img src="/static/app1/imgs/12.png" alt="未显示">
</a>
</div>
<div>
<a href="/static/app1/imgs/13.png" target="_blank">
<img src="/static/app1/imgs/13.png" alt="未显示">
</a>
</div>
<div>
<a href="/static/app1/imgs/14.png" target="_blank">
<img src="/static/app1/imgs/14.png" alt="未显示">
</a>
</div>
<div>
<a href="/static/app1/imgs/15.png" target="_blank">
<img src="/static/app1/imgs/15.png" alt="未显示">
</a>
</div>
<div>
<a href="/static/app1/imgs/16.png" target="_blank">
<img src="/static/app1/imgs/16.png" alt="未显示">
</a>
</div>
<div>
<a href="/static/app1/imgs/17.png" target="_blank">
<img src="/static/app1/imgs/17.png" alt="未显示">
</a>
</div>
<div>
<a href="/static/app1/imgs/18.png" target="_blank">
<img src="/static/app1/imgs/18.png" alt="未显示">
</a>
</div>
<div>
<a href="/static/app1/imgs/19.png" target="_blank">
<img src="/static/app1/imgs/19.png" alt="未显示">
</a>
</div>
<div>
<a href="/static/app1/imgs/2.png" target="_blank">
<img src="/static/app1/imgs/2.png" alt="未显示">
</a>
</div>
<div>
<a href="/static/app1/imgs/20.png" target="_blank">
<img src="/static/app1/imgs/20.png" alt="未显示">
</a>
</div>
<div>
<a href="/static/app1/imgs/21.png" target="_blank">
<img src="/static/app1/imgs/21.png" alt="未显示">
</a>
</div>
<div>
<a href="/static/app1/imgs/22.png" target="_blank">
<img src="/static/app1/imgs/22.png" alt="未显示">
</a>
</div>
<div>
<a href="/static/app1/imgs/23.png" target="_blank">
<img src="/static/app1/imgs/23.png" alt="未显示">
</a>
</div>
<div>
<a href="/static/app1/imgs/24.png" target="_blank">
<img src="/static/app1/imgs/24.png" alt="未显示">
</a>
</div>
<div>
<a href="/static/app1/imgs/25.png" target="_blank">
<img src="/static/app1/imgs/25.png" alt="未显示">
</a>
</div>
<div>
<a href="/static/app1/imgs/3.png" target="_blank">
<img src="/static/app1/imgs/3.png" alt="未显示">
</a>
</div>
<div>
<a href="/static/app1/imgs/4.png" target="_blank">
<img src="/static/app1/imgs/4.png" alt="未显示">
</a>
</div>
<div>
<a href="/static/app1/imgs/5.png" target="_blank">
<img src="/static/app1/imgs/5.png" alt="未显示">
</a>
</div>
<div>
<a href="/static/app1/imgs/6.png" target="_blank">
<img src="/static/app1/imgs/6.png" alt="未显示">
</a>
</div>
<div>
<a href="/static/app1/imgs/7.png" target="_blank">
<img src="/static/app1/imgs/7.png" alt="未显示">
</a>
</div>
<div>
<a href="/static/app1/imgs/8.png" target="_blank">
<img src="/static/app1/imgs/8.png" alt="未显示">
</a>
</div>
<div>
<a href="/static/app1/imgs/9.png" target="_blank">
<img src="/static/app1/imgs/9.png" alt="未显示">
</a>
</div>
</div>
</body>
</html>
文件二
# -*- coding: utf-8 -*-
import scrapy
class EduspiderSpider(scrapy.Spider):
name = 'eduSpider'
allowed_domains = ['127.0.0.1']
start_urls = ['http://127.0.0.1:8080/imgs/']
def parse(self, response):
#********** Begin **********#
with open('images.txt','w') as f:
img = response.xpath("//div[@class='box']/div/a/img/@src")
f.write("{}\n".format(img))
#********** End **********#
第2关【有多个文件】 爬取网站实训图片并下载
文件一
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>花</title>
</head>
<body>
<div class="box">
<div>
<a href="/static/app1/imgs/1.png" target="_blank">
<img src="/static/app1/imgs/1.png" alt="未显示">
</a>
</div>
<div>
<a href="/static/app1/imgs/10.png" target="_blank">
<img src="/static/app1/imgs/10.png" alt="未显示">
</a>
</div>
<div>
<a href="/static/app1/imgs/11.png" target="_blank">
<img src="/static/app1/imgs/11.png" alt="未显示">
</a>
</div>
<div>
<a href="/static/app1/imgs/12.png" target="_blank">
<img src="/static/app1/imgs/12.png" alt="未显示">
</a>
</div>
<div>
<a href="/static/app1/imgs/13.png" target="_blank">
<img src="/static/app1/imgs/13.png" alt="未显示">
</a>
</div>
<div>
<a href="/static/app1/imgs/14.png" target="_blank">
<img src="/static/app1/imgs/14.png" alt="未显示">
</a>
</div>
<div>
<a href="/static/app1/imgs/15.png" target="_blank">
<img src="/static/app1/imgs/15.png" alt="未显示">
</a>
</div>
<div>
<a href="/static/app1/imgs/16.png" target="_blank">
<img src="/static/app1/imgs/16.png" alt="未显示">
</a>
</div>
<div>
<a href="/static/app1/imgs/17.png" target="_blank">
<img src="/static/app1/imgs/17.png" alt="未显示">
</a>
</div>
<div>
<a href="/static/app1/imgs/18.png" target="_blank">
<img src="/static/app1/imgs/18.png" alt="未显示">
</a>
</div>
<div>
<a href="/static/app1/imgs/19.png" target="_blank">
<img src="/static/app1/imgs/19.png" alt="未显示">
</a>
</div>
<div>
<a href="/static/app1/imgs/2.png" target="_blank">
<img src="/static/app1/imgs/2.png" alt="未显示">
</a>
</div>
<div>
<a href="/static/app1/imgs/20.png" target="_blank">
<img src="/static/app1/imgs/20.png" alt="未显示">
</a>
</div>
<div>
<a href="/static/app1/imgs/21.png" target="_blank">
<img src="/static/app1/imgs/21.png" alt="未显示">
</a>
</div>
<div>
<a href="/static/app1/imgs/22.png" target="_blank">
<img src="/static/app1/imgs/22.png" alt="未显示">
</a>
</div>
<div>
<a href="/static/app1/imgs/23.png" target="_blank">
<img src="/static/app1/imgs/23.png" alt="未显示">
</a>
</div>
<div>
<a href="/static/app1/imgs/24.png" target="_blank">
<img src="/static/app1/imgs/24.png" alt="未显示">
</a>
</div>
<div>
<a href="/static/app1/imgs/25.png" target="_blank">
<img src="/static/app1/imgs/25.png" alt="未显示">
</a>
</div>
<div>
<a href="/static/app1/imgs/3.png" target="_blank">
<img src="/static/app1/imgs/3.png" alt="未显示">
</a>
</div>
<div>
<a href="/static/app1/imgs/4.png" target="_blank">
<img src="/static/app1/imgs/4.png" alt="未显示">
</a>
</div>
<div>
<a href="/static/app1/imgs/5.png" target="_blank">
<img src="/static/app1/imgs/5.png" alt="未显示">
</a>
</div>
<div>
<a href="/static/app1/imgs/6.png" target="_blank">
<img src="/static/app1/imgs/6.png" alt="未显示">
</a>
</div>
<div>
<a href="/static/app1/imgs/7.png" target="_blank">
<img src="/static/app1/imgs/7.png" alt="未显示">
</a>
</div>
<div>
<a href="/static/app1/imgs/8.png" target="_blank">
<img src="/static/app1/imgs/8.png" alt="未显示">
</a>
</div>
<div>
<a href="/static/app1/imgs/9.png" target="_blank">
<img src="/static/app1/imgs/9.png" alt="未显示">
</a>
</div>
</div>
</body>
</html>
文件二
# -*- coding: utf-8 -*-
# Define here the models for your scraped items
#
# See documentation in:
# https://doc.scrapy.org/en/latest/topics/items.html
import scrapy
class ImgprojectItem(scrapy.Item):
#********** Begin **********#
img_urls = scrapy.Field() #图片的完整链接
images = scrapy.Field() #保存图片的信息
#********** End **********#
文件三
# -*- coding: utf-8 -*-
# Define your item pipelines here
#
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: https://doc.scrapy.org/en/latest/topics/item-pipeline.html
import os,requests
from ImgProject import settings
class ImgprojectPipeline(object):
def process_item(self, item, spider):
#********** Begin **********#
dir_path ='{}'.format(settings.IMAGES_STORE) #文件夹路径:从配置文件settings中导入定义好的路径
if not os.path.exists(dir_path):
os.makedirs(dir_path)
name = item['images']
img_url = item['img_urls']
img_path = dir_path +'/'+ name +'.jpg' #图片的最终存储路径
img = requests.get(img_url,headers=settings.DEFAULT_REQUEST_HEADERS)
with open(img_path,'wb')as file:
file.write(img.content)
#********** End **********#
文件四
# -*- coding: utf-8 -*-
import scrapy
from ImgProject.items import ImgprojectItem
class ImgspierSpider(scrapy.Spider):
name = 'imgspier'
allowed_domains = ['127.0.0.1']
start_urls = ['http://127.0.0.1:8080/imgs/']
def parse(self, response):
#********** Begin **********#
img_srcs = response.xpath("//div[@class='box']/div/a/img/@src").extract()
for img_src in img_srcs:
name=img_src.split('/')[-1].split('.')[0]
item = ImgprojectItem()
item['img_urls'] = "http://127.0.0.1:8080" + img_src
item['images'] = name
yield item
#********** End **********#
Scrapy爬虫之热门网站数据爬取
第1关【有多个文件】 猫眼电影排行TOP100信息爬取
文件一step1/maoyan/maoyan/items.py
# -*- coding: utf-8 -*-
# Define here the models for your scraped items
#
# See documentation in:
# https://doc.scrapy.org/en/latest/topics/items.html
import scrapy
class MaoyanItem(scrapy.Item):
#********** Begin **********#
name = scrapy.Field()
starts = scrapy.Field()
releasetime = scrapy.Field()
score = scrapy.Field()
#********** End **********#
文件二step1/maoyan/maoyan/pipelines.py
# -*- coding: utf-8 -*-
# Define your item pipelines here
#
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: https://doc.scrapy.org/en/latest/topics/item-pipeline.html
import pymysql
class MaoyanPipeline(object):
def process_item(self, item, spider):
#********** Begin **********#
#1.连接数据库
connection = pymysql.connect(
host='localhost', # 连接的是本地数据库
port=3306, #数据库端口名
user='root', # 自己的mysql用户名
passwd='123123', # 自己的密码
db='mydb', # 数据库的名字
charset='utf8', # 默认的编码方式
)
#2.建表、给表插入数据,完成后关闭数据库连接,return返回item
name = item['name']
starts = item['starts']
releasetime = item['releasetime']
score = item['score']
try:
with connection.cursor() as cursor:
sql1 = 'Create Table If Not Exists mymovies(name varchar(50) CHARACTER SET utf8 NOT NULL,starts text CHARACTER SET utf8 NOT NULL,releasetime varchar(50) CHARACTER SET utf8 DEFAULT NULL,score varchar(20) CHARACTER SET utf8 NOT NULL,PRIMARY KEY(name))'
# 单章小说的写入
sql2 = 'Insert into mymovies values (\'%s\',\'%s\',\'%s\',\'%s\')' % (
name, starts, releasetime, score)
cursor.execute(sql1)
cursor.execute(sql2)
# 提交本次插入的记录
connection.commit()
finally:
# 关闭连接
connection.close()
return item
#********** End **********#
文件三
# -*- coding: utf-8 -*-
import scrapy
from maoyan.items import MaoyanItem
class MoviesSpider(scrapy.Spider):
name = 'movies'
allowed_domains = ['127.0.0.1']
offset = 0
url = "http://127.0.0.1:8080/board/4?offset="
#********** Begin **********#
#1.对url进行定制,为翻页做准备
start_urls = [url + str(offset)]
#2.定义爬虫函数parse()
def parse(self, response):
item = MaoyanItem()
movies = response.xpath("//div[ @class ='board-item-content']")
for each in movies:
#电影名
name = each.xpath(".//div/p/a/text()").extract()[0]
#主演明星
starts = each.xpath(".//div[1]/p/text()").extract()[0]
#上映时间
releasetime = each.xpath(".//div[1]/p[3]/text()").extract()[0]
score1 = each.xpath(".//div[2]/p/i[1]/text()").extract()[0]
score2 = each.xpath(".//div[2]/p/i[2]/text()").extract()[0]
#评分
score = score1 + score2
item['name'] = name
item['starts'] = starts
item['releasetime'] = releasetime
item['score'] = score
yield item
#3.在函数的最后offset自加10,然后重新发出请求实现翻页功能
if self.offset < 90:
self.offset += 10
yield scrapy.Request(self.url+str(self.offset), callback=self.parse)
#********** End **********#
第2关【有多个文件】 小说网站玄幻分类第一页小说爬取
文件一
# -*- coding: utf-8 -*-
# Define here the models for your scraped items
#
# See documentation in:
# https://doc.scrapy.org/en/latest/topics/items.html
import scrapy
#存放全部小说信息
class NovelprojectItem(scrapy.Item):
#********** Begin **********#
name = scrapy.Field()
author = scrapy.Field()
state = scrapy.Field()
description = scrapy.Field()
#********** End **********#
#单独存放小说章节
class NovelprojectItem2(scrapy.Item):
#********** Begin **********#
tablename = scrapy.Field()
title = scrapy.Field()
#********** End **********#
文件二
# -*- coding: utf-8 -*-
# Define your item pipelines here
#
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: https://doc.scrapy.org/en/latest/topics/item-pipeline.html
import pymysql
from NovelProject.items import NovelprojectItem,NovelprojectItem2
class NovelprojectPipeline(object):
def process_item(self, item, spider):
#********** Begin **********#
#1.和本地的数据库mydb建立连接
connection = pymysql.connect(
host='localhost', # 连接的是本地数据库
port = 3306, # 端口号
user='root', # 自己的mysql用户名
passwd='123123', # 自己的密码
db='mydb', # 数据库的名字
charset='utf8', # 默认的编码方式:
)
#2.处理来自NovelprojectItem的item(处理完成后return返回item)
if isinstance(item, NovelprojectItem):
# 从items里取出数据
name = item['name']
author = item['author']
state = item['state']
description = item['description']
try:
with connection.cursor() as cursor:
# 小说信息写入
sql1 = 'Create Table If Not Exists novel(name varchar(20) CHARACTER SET utf8 NOT NULL,author varchar(10) CHARACTER SET utf8,state varchar(20) CHARACTER SET utf8,description text CHARACTER SET utf8,PRIMARY KEY (name))'
sql2 = 'Insert into novel values (\'%s\',\'%s\',\'%s\',\'%s\')' % (name, author, state, description)
cursor.execute(sql1)
cursor.execute(sql2)
# 提交本次插入的记录
connection.commit()
finally:
# 关闭连接
connection.close()
return item
#3.处理来自NovelprojectItem2的item(处理完成后return返回item)
elif isinstance(item, NovelprojectItem2):
tablename = item['tablename']
title = item['title']
try:
with connection.cursor() as cursor:
# 小说章节的写入
sql3 = 'Create Table If Not Exists %s(title varchar(20) CHARACTER SET utf8 NOT NULL,PRIMARY KEY (title))' % tablename
sql4 = 'Insert into %s values (\'%s\')' % (tablename, title)
cursor.execute(sql3)
cursor.execute(sql4)
connection.commit()
finally:
connection.close()
return item
#********** End **********#
文件三
# -*- coding: utf-8 -*-
import scrapy
import re
from scrapy.http import Request
from NovelProject.items import NovelprojectItem
from NovelProject.items import NovelprojectItem2
class NovelSpider(scrapy.Spider):
name = 'novel'
allowed_domains = ['127.0.0.1']
start_urls = ['http://127.0.0.1:8000/list/1_1.html'] #全书网玄幻魔法类第一页
#********** Begin **********#
#1.定义函数,通过'马上阅读'获取每一本书的 URL
def parse(self, response):
book_urls = response.xpath('//li/a[@class="l mr10"]/@href').extract()
three_book_urls = book_urls[0:3] #只取3本
for book_url in three_book_urls:
yield Request(book_url, callback=self.parse_read)
#2.定义函数,进入小说简介页面,获取信息,得到后yield返回给pipelines处理,并获取'开始阅读'的url,进入章节目录
def parse_read(self, response):
item = NovelprojectItem()
# 小说名字
name = response.xpath('//div[@class="b-info"]/h1/text()').extract_first()
#小说简介
description = response.xpath('//div[@class="infoDetail"]/div/text()').extract_first()
# 小说连载状态
state = response.xpath('//div[@class="bookDetail"]/dl[1]/dd/text()').extract_first()
# 作者名字
author = response.xpath('//div[@class="bookDetail"]/dl[2]/dd/text()').extract_first()
item['name'] = name
item['description'] = description
item['state'] = state
item['author'] = author
yield item
# 获取开始阅读按钮的URL,进入章节目录
read_url = response.xpath('//a[@class="reader"]/@href').extract()[0]
yield Request(read_url, callback=self.parse_info)
#3.定义函数,进入章节目录,获取小说章节名并yield返回
def parse_info(self, response):
item = NovelprojectItem2()
tablename = response.xpath('//div[@class="main-index"]/a[3]/text()').extract_first()
titles = response.xpath('//div[@class="clearfix dirconone"]/li')
for each in titles:
title = each.xpath('.//a/text()').extract_first()
item['tablename'] = tablename
item['title'] = title
yield item
#********** End **********#
你可能感兴趣的:(头歌学习心得,单元测试)