提示:文章写完后,目录可以自动生成,如何生成可参考右边的帮助文档
例如:随着人工智能的不断发展,机器学习这门技术也越来越重要,很多人都开启了学习机器学习,本文就介绍了机器学习的基础内容。
crowller.ts
// lilied
// html获取模块
import superagent from 'superagent'
// html解析模块
import { load } from 'cheerio'
import fs from 'fs'
import path from 'path'
interface PersonInfo {
img_url: string;
name: string;
link: string;
store: string;
}
interface Result {
[propName: string]: PersonInfo[]
}
class Crowller {
fullPath = path.resolve(__dirname, '../data/data.json')
constructor(private _url: string) {
this.initSprderProcess()
}
async initSprderProcess() {
const text = await this.getWebHtml(this._url)
const personList = this.parseHtml(text)
const resultStr = this.generateJsonContent(personList)
this.storeData(resultStr)
}
// 获取网页HTML
async getWebHtml(url: string) {
const result = await superagent.get(url)
return result.text
}
// 解析网页HTML
parseHtml(text: string) {
const personList: PersonInfo[] = []
const $ = load(text)
$('.rw_col').map((index, element) => {
const img_url = $(element).find('div.rw_img > a > img').attr('src') || ''
const name = $(element).find('div.rw_img > a > img').attr('alt') || ''
const link = $(element).find('div.rw_img > a').attr('href') || ''
const store = $(element).find('.dianji').text().split(':')[1] || ''
personList.push({
img_url,
name,
link,
store
})
})
return personList
}
// 生成json格式
generateJsonContent(newData: PersonInfo[]):string{
const fullPath = this.fullPath
let result:Result = {}
if(fs.existsSync(fullPath)){
result = JSON.parse(fs.readFileSync(fullPath, 'utf8'))
}
result[new Date().getTime()] = newData
return JSON.stringify(result)
}
// 存储数据
storeData(data: string) {
// fs.writeFileSync(fullPath, data)
fs.writeFile(this.fullPath, data, (err) => {
if(err){
throw err
}
console.log('数据写入成功');
})
}
}
function main() {
const URL = 'http://www.sextp.com/jpmt/'
const c = new Crowller(URL)
}
main()
crowller.ts
// lilied
// html获取模块
import superagent from 'superagent'
import path from 'path'
import fs from 'fs'
import DellAnalyzer from './DellAnalyzer'
export interface Analyzer {
analyze: (html: string, fullPath: string) => string
}
class Crowller {
fullPath = path.resolve(__dirname, '../data/data.json')
constructor(private _url: string, private _analyzer: Analyzer) {
this.initSprderProcess()
}
async initSprderProcess() {
const text = await this.getWebHtml(this._url)
const resultStr = this._analyzer.analyze(text, this.fullPath)
this.storeData(resultStr)
}
// 获取网页HTML
async getWebHtml(url: string) {
const result = await superagent.get(url)
return result.text
}
// 存储数据
storeData(data: string) {
// fs.writeFileSync(fullPath, data)
fs.writeFile(this.fullPath, data, (err) => {
if (err) {
throw err
}
console.log('数据写入成功');
})
}
}
function main() {
const URL = 'http://www.sextp.com/jpmt/'
const analyzer = new DellAnalyzer()
new Crowller(URL, analyzer)
}
main()
DellAnalyzer.ts
// html解析模块
import { load } from 'cheerio'
import fs from 'fs'
import { Analyzer } from './crowller'
interface PersonInfo {
img_url: string;
name: string;
link: string;
store: string;
}
interface Result {
[propName: string]: PersonInfo[]
}
export default class DellAnalyzer implements Analyzer {
// 解析网页HTML
private parseHtml(text: string) {
const personList: PersonInfo[] = []
const $ = load(text)
$('.rw_col').map((index, element) => {
const img_url = $(element).find('div.rw_img > a > img').attr('src') || ''
const name = $(element).find('div.rw_img > a > img').attr('alt') || ''
const link = $(element).find('div.rw_img > a').attr('href') || ''
const store = $(element).find('.dianji').text().split(':')[1] || ''
personList.push({
img_url,
name,
link,
store
})
})
return personList
}
// 生成json格式
private generateJsonContent(newData: PersonInfo[], fullPath:string):string{
let result:Result = {}
if(fs.existsSync(fullPath)){
result = JSON.parse(fs.readFileSync(fullPath, 'utf8'))
}
result[new Date().getTime()] = newData
return JSON.stringify(result)
}
public analyze(html: string, fullPath: string): string{
const personList = this.parseHtml(html)
const resultStr = this.generateJsonContent(personList, fullPath)
return resultStr
}
}
crowller.ts
// lilied
// html获取模块
import superagent from 'superagent'
import path from 'path'
import fs from 'fs'
import DellAnalyzer from './DellAnalyzer'
export interface Analyzer {
analyze: (html: string, fullPath: string) => string
}
class Crowller {
fullPath = path.resolve(__dirname, '../data/data.json')
constructor(private _url: string, private _analyzer: Analyzer) {
this.initSprderProcess()
}
async initSprderProcess() {
const text = await this.getWebHtml(this._url)
const resultStr = this._analyzer.analyze(text, this.fullPath)
this.storeData(resultStr)
}
// 获取网页HTML
async getWebHtml(url: string) {
const result = await superagent.get(url)
return result.text
}
// 存储数据
storeData(data: string) {
// fs.writeFileSync(fullPath, data)
fs.writeFile(this.fullPath, data, (err) => {
if (err) {
throw err
}
console.log('数据写入成功');
})
}
}
function main() {
const URL = 'http://www.sextp.com/jpmt/'
const analyzer = DellAnalyzer.getInstance()
new Crowller(URL, analyzer)
}
main()
DellAnalyzer.ts
// html解析模块
import { load } from 'cheerio'
import fs from 'fs'
import { Analyzer } from './crowller'
interface PersonInfo {
img_url: string;
name: string;
link: string;
store: string;
}
interface Result {
[propName: string]: PersonInfo[]
}
export default class DellAnalyzer implements Analyzer {
// (单例)
private static instance: DellAnalyzer
// 私有化构造方法后,外部无法实例化(单例)
private constructor(){}
// 获取实例的静态方法(单例)
public static getInstance(){
if(!DellAnalyzer.instance){
DellAnalyzer.instance = new DellAnalyzer()
}
return DellAnalyzer.instance
}
// 解析网页HTML
private parseHtml(text: string) {
const personList: PersonInfo[] = []
const $ = load(text)
$('.rw_col').map((index, element) => {
const img_url = $(element).find('div.rw_img > a > img').attr('src') || ''
const name = $(element).find('div.rw_img > a > img').attr('alt') || ''
const link = $(element).find('div.rw_img > a').attr('href') || ''
const store = $(element).find('.dianji').text().split(':')[1] || ''
personList.push({
img_url,
name,
link,
store
})
})
return personList
}
// 生成json格式
private generateJsonContent(newData: PersonInfo[], fullPath:string):string{
let result:Result = {}
if(fs.existsSync(fullPath)){
result = JSON.parse(fs.readFileSync(fullPath, 'utf8'))
}
result[new Date().getTime()] = newData
return JSON.stringify(result)
}
public analyze(html: string, fullPath: string): string{
const personList = this.parseHtml(html)
const resultStr = this.generateJsonContent(personList, fullPath)
return resultStr
}
}
(npm install -D ts-node 测试用)
package.json
{
"name": "20221206",
"version": "1.0.0",
"description": "",
"main": "index.js",
"scripts": {
"dev:build": "tsc -w",
"dev:start": "nodemon node ./dist/crowller.js",
"dev":"tsc && concurrently npm:dev:*"
},
"nodemonConfig": {
"ignore":[
"data/*"
]
},
"keywords": [],
"author": "",
"license": "ISC",
"devDependencies": {
"concurrently": "^7.6.0",
"nodemon": "^2.0.20",
"ts-node": "^10.9.1",
"typescript": "^4.9.3"
},
"dependencies": {
"@types/superagent": "^4.1.16",
"cheerio": "^1.0.0-rc.12",
"superagent": "^8.0.5"
}
}
提示:这里对文章进行总结:
例如:以上就是今天要讲的内容,本文仅仅简单介绍了pandas的使用,而pandas提供了大量能使我们快速便捷地处理数据的函数和方法。