测试提交
This commit is contained in:
parent
b1846b6c14
commit
44fc4c2b64
136
.gitignore
vendored
136
.gitignore
vendored
@ -1,132 +1,6 @@
|
|||||||
# ---> Node
|
.idea/
|
||||||
# Logs
|
.vscode/
|
||||||
logs
|
|
||||||
*.log
|
|
||||||
npm-debug.log*
|
|
||||||
yarn-debug.log*
|
|
||||||
yarn-error.log*
|
|
||||||
lerna-debug.log*
|
|
||||||
.pnpm-debug.log*
|
|
||||||
|
|
||||||
# Diagnostic reports (https://nodejs.org/api/report.html)
|
|
||||||
report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
|
|
||||||
|
|
||||||
# Runtime data
|
|
||||||
pids
|
|
||||||
*.pid
|
|
||||||
*.seed
|
|
||||||
*.pid.lock
|
|
||||||
|
|
||||||
# Directory for instrumented libs generated by jscoverage/JSCover
|
|
||||||
lib-cov
|
|
||||||
|
|
||||||
# Coverage directory used by tools like istanbul
|
|
||||||
coverage
|
|
||||||
*.lcov
|
|
||||||
|
|
||||||
# nyc test coverage
|
|
||||||
.nyc_output
|
|
||||||
|
|
||||||
# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
|
|
||||||
.grunt
|
|
||||||
|
|
||||||
# Bower dependency directory (https://bower.io/)
|
|
||||||
bower_components
|
|
||||||
|
|
||||||
# node-waf configuration
|
|
||||||
.lock-wscript
|
|
||||||
|
|
||||||
# Compiled binary addons (https://nodejs.org/api/addons.html)
|
|
||||||
build/Release
|
|
||||||
|
|
||||||
# Dependency directories
|
|
||||||
node_modules/
|
node_modules/
|
||||||
jspm_packages/
|
build/
|
||||||
|
tmp/
|
||||||
# Snowpack dependency directory (https://snowpack.dev/)
|
temp/
|
||||||
web_modules/
|
|
||||||
|
|
||||||
# TypeScript cache
|
|
||||||
*.tsbuildinfo
|
|
||||||
|
|
||||||
# Optional npm cache directory
|
|
||||||
.npm
|
|
||||||
|
|
||||||
# Optional eslint cache
|
|
||||||
.eslintcache
|
|
||||||
|
|
||||||
# Optional stylelint cache
|
|
||||||
.stylelintcache
|
|
||||||
|
|
||||||
# Microbundle cache
|
|
||||||
.rpt2_cache/
|
|
||||||
.rts2_cache_cjs/
|
|
||||||
.rts2_cache_es/
|
|
||||||
.rts2_cache_umd/
|
|
||||||
|
|
||||||
# Optional REPL history
|
|
||||||
.node_repl_history
|
|
||||||
|
|
||||||
# Output of 'npm pack'
|
|
||||||
*.tgz
|
|
||||||
|
|
||||||
# Yarn Integrity file
|
|
||||||
.yarn-integrity
|
|
||||||
|
|
||||||
# dotenv environment variable files
|
|
||||||
.env
|
|
||||||
.env.development.local
|
|
||||||
.env.test.local
|
|
||||||
.env.production.local
|
|
||||||
.env.local
|
|
||||||
|
|
||||||
# parcel-bundler cache (https://parceljs.org/)
|
|
||||||
.cache
|
|
||||||
.parcel-cache
|
|
||||||
|
|
||||||
# Next.js build output
|
|
||||||
.next
|
|
||||||
out
|
|
||||||
|
|
||||||
# Nuxt.js build / generate output
|
|
||||||
.nuxt
|
|
||||||
dist
|
|
||||||
|
|
||||||
# Gatsby files
|
|
||||||
.cache/
|
|
||||||
# Comment in the public line in if your project uses Gatsby and not Next.js
|
|
||||||
# https://nextjs.org/blog/next-9-1#public-directory-support
|
|
||||||
# public
|
|
||||||
|
|
||||||
# vuepress build output
|
|
||||||
.vuepress/dist
|
|
||||||
|
|
||||||
# vuepress v2.x temp and cache directory
|
|
||||||
.temp
|
|
||||||
.cache
|
|
||||||
|
|
||||||
# Docusaurus cache and generated files
|
|
||||||
.docusaurus
|
|
||||||
|
|
||||||
# Serverless directories
|
|
||||||
.serverless/
|
|
||||||
|
|
||||||
# FuseBox cache
|
|
||||||
.fusebox/
|
|
||||||
|
|
||||||
# DynamoDB Local files
|
|
||||||
.dynamodb/
|
|
||||||
|
|
||||||
# TernJS port file
|
|
||||||
.tern-port
|
|
||||||
|
|
||||||
# Stores VSCode versions used for testing VSCode extensions
|
|
||||||
.vscode-test
|
|
||||||
|
|
||||||
# yarn v2
|
|
||||||
.yarn/cache
|
|
||||||
.yarn/unplugged
|
|
||||||
.yarn/build-state.yml
|
|
||||||
.yarn/install-state.gz
|
|
||||||
.pnp.*
|
|
||||||
|
|
@ -1,2 +1,7 @@
|
|||||||
# DataSpider2
|
# Awesome Project Build with TypeORM
|
||||||
|
|
||||||
|
Steps to run this project:
|
||||||
|
|
||||||
|
1. Run `npm i` command
|
||||||
|
2. Setup database settings inside `data-source.ts` file
|
||||||
|
3. Run `npm start` command
|
||||||
|
2397
package-lock.json
generated
Normal file
2397
package-lock.json
generated
Normal file
File diff suppressed because it is too large
Load Diff
24
package.json
Normal file
24
package.json
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
{
|
||||||
|
"name": "typeorm-sample",
|
||||||
|
"version": "0.0.1",
|
||||||
|
"description": "Awesome project developed with TypeORM.",
|
||||||
|
"type": "commonjs",
|
||||||
|
"devDependencies": {
|
||||||
|
"@types/jsdom": "^21.1.2",
|
||||||
|
"@types/node": "^16.11.10",
|
||||||
|
"ts-node": "10.7.0",
|
||||||
|
"typescript": "4.5.2"
|
||||||
|
},
|
||||||
|
"dependencies": {
|
||||||
|
"axios": "^1.4.0",
|
||||||
|
"jsdom": "^22.1.0",
|
||||||
|
"mysql": "^2.14.1",
|
||||||
|
"reflect-metadata": "^0.1.13",
|
||||||
|
"sqlite3": "^5.1.6",
|
||||||
|
"typeorm": "0.3.17"
|
||||||
|
},
|
||||||
|
"scripts": {
|
||||||
|
"start": "ts-node src/index.ts",
|
||||||
|
"typeorm": "typeorm-ts-node-commonjs"
|
||||||
|
}
|
||||||
|
}
|
34
src/Spiders/Baidu.ts
Normal file
34
src/Spiders/Baidu.ts
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
import axios from 'axios';
|
||||||
|
import {JSDOM} from 'jsdom'
|
||||||
|
import { Hot } from '../entity/Hot';
|
||||||
|
import { AppDataSource } from '../data-source';
|
||||||
|
import { Web } from '../entity/Web';
|
||||||
|
import { insBaidu } from '../installer/Baidu';
|
||||||
|
import { SpiderMain } from './main';
|
||||||
|
|
||||||
|
export const runBaiduSpider = async ()=>{
|
||||||
|
await main()
|
||||||
|
}
|
||||||
|
|
||||||
|
const main = async ()=>{
|
||||||
|
const {window} = await JSDOM.fromURL("https://top.baidu.com/board?tab=realtime")
|
||||||
|
const document = window.document
|
||||||
|
let getClasses = document.querySelectorAll(".c-single-text-ellipsis")
|
||||||
|
let ListHots:Hot[] = []
|
||||||
|
getClasses.forEach(d=>{
|
||||||
|
const NewHot = new Hot()
|
||||||
|
NewHot.word=d.innerHTML.trim()
|
||||||
|
ListHots.push(NewHot)
|
||||||
|
})
|
||||||
|
let fromWeb = await AppDataSource.manager.findOneBy(Web,{
|
||||||
|
name:"Baidu"
|
||||||
|
})
|
||||||
|
if(fromWeb == null){
|
||||||
|
await insBaidu()
|
||||||
|
fromWeb = await AppDataSource.manager.findOneByOrFail(Web,{
|
||||||
|
name:"Baidu"
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
SpiderMain(fromWeb,ListHots)
|
||||||
|
}
|
35
src/Spiders/Bilibili.ts
Normal file
35
src/Spiders/Bilibili.ts
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
import axios from "axios"
|
||||||
|
import { Hot } from "../entity/Hot"
|
||||||
|
import { AppDataSource } from "../data-source"
|
||||||
|
import { Web } from "../entity/Web"
|
||||||
|
import { Search } from "../entity/Search"
|
||||||
|
import { IsNull } from "typeorm"
|
||||||
|
import { Times } from "../entity/Times"
|
||||||
|
import { SpiderMain } from "./main"
|
||||||
|
import { insBilibili } from "../installer/Bilibili"
|
||||||
|
|
||||||
|
export const runBiliSpider = async () => {
|
||||||
|
await main()
|
||||||
|
}
|
||||||
|
|
||||||
|
const main = async () => {
|
||||||
|
const axiosRaw = await axios.get("https://api.bilibili.com/x/web-interface/wbi/search/square?limit=50")
|
||||||
|
const getList: any[] = axiosRaw.data.data.trending.list
|
||||||
|
const crtHots: Hot[] = []
|
||||||
|
getList.forEach(d => {
|
||||||
|
const newHot = new Hot()
|
||||||
|
newHot.word = d.keyword
|
||||||
|
crtHots.push(newHot)
|
||||||
|
})
|
||||||
|
// console.log(crtHots)
|
||||||
|
let fromWeb = await AppDataSource.manager.findOneBy(Web, {
|
||||||
|
name: "Bilibili"
|
||||||
|
})
|
||||||
|
if (fromWeb == null) {
|
||||||
|
await insBilibili()
|
||||||
|
fromWeb = await AppDataSource.manager.findOneByOrFail(Web, {
|
||||||
|
name: "Bilibili"
|
||||||
|
})
|
||||||
|
}
|
||||||
|
await SpiderMain(fromWeb, crtHots)
|
||||||
|
}
|
42
src/Spiders/main.ts
Normal file
42
src/Spiders/main.ts
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
import { AppDataSource } from "../data-source";
|
||||||
|
import { Hot } from "../entity/Hot";
|
||||||
|
import { Search } from "../entity/Search";
|
||||||
|
import { Times } from "../entity/Times";
|
||||||
|
import { Web } from "../entity/Web";
|
||||||
|
|
||||||
|
export const SpiderMain = async (fromWeb: Web, HotList: Hot[]) => {
|
||||||
|
//新建一个搜索
|
||||||
|
let crtSearch = new Search()
|
||||||
|
const nowDate = new Date()
|
||||||
|
crtSearch.date = nowDate
|
||||||
|
crtSearch.fromWeb = fromWeb
|
||||||
|
crtSearch = await AppDataSource.manager.save(crtSearch)
|
||||||
|
await Promise.all(HotList.map(async d => {
|
||||||
|
d.fromSearch = crtSearch
|
||||||
|
|
||||||
|
let fromTimes = new Times()
|
||||||
|
//获取历史统计次数
|
||||||
|
fromTimes = await AppDataSource.manager.findOneBy(Times,{
|
||||||
|
word:d.word,
|
||||||
|
fromWeb:fromWeb
|
||||||
|
})
|
||||||
|
//如果为空那么创建一个
|
||||||
|
if(fromTimes==null){
|
||||||
|
fromTimes = new Times()
|
||||||
|
fromTimes.times=0
|
||||||
|
fromTimes.word = d.word
|
||||||
|
fromTimes.fromWeb =fromWeb
|
||||||
|
fromTimes.lastTime = nowDate
|
||||||
|
fromTimes = await AppDataSource.manager.save(fromTimes)
|
||||||
|
}
|
||||||
|
//增加统计次数的出现次数
|
||||||
|
fromTimes.times++
|
||||||
|
//更新日期
|
||||||
|
fromTimes.lastTime = nowDate
|
||||||
|
fromTimes = await AppDataSource.manager.save(fromTimes)
|
||||||
|
|
||||||
|
d.fromTimes = fromTimes
|
||||||
|
|
||||||
|
await AppDataSource.manager.save(d)
|
||||||
|
}))
|
||||||
|
}
|
35
src/data-source.ts
Normal file
35
src/data-source.ts
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
import "reflect-metadata"
|
||||||
|
import { DataSource } from "typeorm"
|
||||||
|
import { Search } from "./entity/Search"
|
||||||
|
import { Hot } from "./entity/Hot"
|
||||||
|
import { Web } from "./entity/Web"
|
||||||
|
import { Times } from "./entity/Times"
|
||||||
|
import { readFileSync } from "fs"
|
||||||
|
import path = require("path")
|
||||||
|
// import { main } from "."
|
||||||
|
|
||||||
|
export const AppDataSource = new DataSource({
|
||||||
|
type: "mysql",
|
||||||
|
host: "localhost",
|
||||||
|
port: 3306,
|
||||||
|
username: "root",
|
||||||
|
password: "ZengtudorRXR2008",
|
||||||
|
// password: "Zengtudor",
|
||||||
|
database: "hot",
|
||||||
|
synchronize: true,
|
||||||
|
logging: false,
|
||||||
|
entities: [Search,Hot,Web,Times],
|
||||||
|
migrations: [],
|
||||||
|
subscribers: [],
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
|
// export const AppDataSource = new DataSource({
|
||||||
|
// type: "sqlite",
|
||||||
|
// database: "./hot.sqlite",
|
||||||
|
// synchronize: true,
|
||||||
|
// logging: false,
|
||||||
|
// entities: [Search,Hot,Web,Times],
|
||||||
|
// migrations: [],
|
||||||
|
// subscribers: [],
|
||||||
|
// })
|
15
src/entity/Hot.ts
Normal file
15
src/entity/Hot.ts
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
import { Column, Entity, ManyToOne, PrimaryGeneratedColumn } from "typeorm";
|
||||||
|
import { Search } from "./Search";
|
||||||
|
import { Times } from "./Times";
|
||||||
|
|
||||||
|
@Entity()
|
||||||
|
export class Hot{
|
||||||
|
@PrimaryGeneratedColumn()
|
||||||
|
id:number
|
||||||
|
@Column()
|
||||||
|
word:string
|
||||||
|
@ManyToOne(()=>Search,(search)=>search.Hots)
|
||||||
|
fromSearch:Search
|
||||||
|
@ManyToOne((type)=>Times,(time)=>time.Hots)
|
||||||
|
fromTimes
|
||||||
|
}
|
15
src/entity/Search.ts
Normal file
15
src/entity/Search.ts
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
import { Column, Entity, ManyToOne, OneToMany, PrimaryGeneratedColumn } from "typeorm";
|
||||||
|
import { Hot } from "./Hot";
|
||||||
|
import { Web } from "./Web";
|
||||||
|
|
||||||
|
@Entity()
|
||||||
|
export class Search{
|
||||||
|
@PrimaryGeneratedColumn()
|
||||||
|
id:number
|
||||||
|
@OneToMany((type)=>Hot,(hot)=>hot.fromSearch)
|
||||||
|
Hots:Hot[]
|
||||||
|
@ManyToOne((type)=>Web,(web)=>web.Searches)
|
||||||
|
fromWeb
|
||||||
|
@Column()
|
||||||
|
date:Date
|
||||||
|
}
|
19
src/entity/Times.ts
Normal file
19
src/entity/Times.ts
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
import { Column, Entity, ManyToOne, OneToMany, OneToOne, PrimaryGeneratedColumn } from "typeorm";
|
||||||
|
import { Hot } from "./Hot";
|
||||||
|
import { Web } from "./Web";
|
||||||
|
|
||||||
|
@Entity()
|
||||||
|
export class Times{
|
||||||
|
@PrimaryGeneratedColumn()
|
||||||
|
id:number
|
||||||
|
@OneToMany((type)=>Hot,(hot)=>hot.fromTimes)
|
||||||
|
Hots:Hot[]
|
||||||
|
@Column()
|
||||||
|
times:number
|
||||||
|
@Column()
|
||||||
|
word:string
|
||||||
|
@ManyToOne(()=>Web,(web)=>web.times)
|
||||||
|
fromWeb:Web
|
||||||
|
@Column()
|
||||||
|
lastTime:Date
|
||||||
|
}
|
19
src/entity/Web.ts
Normal file
19
src/entity/Web.ts
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
import { Column, Entity, OneToMany, PrimaryGeneratedColumn } from "typeorm";
|
||||||
|
import { Search } from "./Search";
|
||||||
|
import { Times } from "./Times";
|
||||||
|
|
||||||
|
@Entity()
|
||||||
|
export class Web{
|
||||||
|
@PrimaryGeneratedColumn()
|
||||||
|
id:number
|
||||||
|
@OneToMany((type)=>Search,(search)=>search.fromWeb)
|
||||||
|
Searches:Search[]
|
||||||
|
@Column()
|
||||||
|
name:string
|
||||||
|
@Column()
|
||||||
|
fromUrl:string
|
||||||
|
@Column()
|
||||||
|
searchUrl:string
|
||||||
|
@OneToMany(()=>Times,(times)=>times.fromWeb)
|
||||||
|
times:Times[]
|
||||||
|
}
|
54
src/index.ts
Normal file
54
src/index.ts
Normal file
@ -0,0 +1,54 @@
|
|||||||
|
import { readFileSync } from "fs";
|
||||||
|
import { runBiliSpider } from "./Spiders/Bilibili";
|
||||||
|
import { AppDataSource } from "./data-source"
|
||||||
|
import { Web } from "./entity/Web"
|
||||||
|
import { runBaiduSpider } from "./Spiders/Baidu";
|
||||||
|
|
||||||
|
let runTimes = 0;
|
||||||
|
|
||||||
|
const main = async () => {
|
||||||
|
const startTime = new Date()
|
||||||
|
await AppDataSource.initialize()
|
||||||
|
// await AppDataSource.synchronize()
|
||||||
|
// await insBilibili()
|
||||||
|
|
||||||
|
let tasks: Promise<void>[] = []
|
||||||
|
|
||||||
|
tasks.push(runBiliSpider())
|
||||||
|
tasks.push(runBaiduSpider())
|
||||||
|
|
||||||
|
await Promise.all(tasks)
|
||||||
|
setTimeout(async ()=>{
|
||||||
|
await AppDataSource.destroy()
|
||||||
|
console.log("已关闭数据库...")
|
||||||
|
},1000*10)
|
||||||
|
console.log("延时数据库关闭....","10s")
|
||||||
|
|
||||||
|
// setTimeout(async ()=>{
|
||||||
|
|
||||||
|
// console.log("数据库已关闭。。。")
|
||||||
|
// },1000*10)
|
||||||
|
|
||||||
|
// await AppDataSource.destroy()
|
||||||
|
|
||||||
|
const endTime = new Date()
|
||||||
|
const span = new Date(endTime.getTime() - startTime.getTime())
|
||||||
|
runTimes++;
|
||||||
|
console.log("时间:", endTime.toLocaleString(), "任务完成用时:", span.getMilliseconds(), "毫秒 || ", span.getSeconds(), "秒", `第${runTimes}次完成任务`)
|
||||||
|
}
|
||||||
|
|
||||||
|
main().then(() => {
|
||||||
|
console.log("first test run is OK!!!")
|
||||||
|
// AppDataSource.destroy()
|
||||||
|
}).catch(e => {
|
||||||
|
console.error("first run error !!!!!!!!!!!!!!!", e)
|
||||||
|
// process.exit(1)
|
||||||
|
})
|
||||||
|
|
||||||
|
// setInterval(()=>{
|
||||||
|
// main().then(()=>{
|
||||||
|
// console.log("run is OK!!!")
|
||||||
|
// }).catch(e=>{
|
||||||
|
// console.error("run error !!!!!!!!!!!!!!!",e)
|
||||||
|
// })
|
||||||
|
// },1000*60*15)
|
10
src/installer/Baidu.ts
Normal file
10
src/installer/Baidu.ts
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
import { AppDataSource } from "../data-source"
|
||||||
|
import { Web } from "../entity/Web"
|
||||||
|
|
||||||
|
export const insBaidu = async ()=>{
|
||||||
|
const newWeb = new Web()
|
||||||
|
newWeb.fromUrl = "https://www.baidu.com/"
|
||||||
|
newWeb.name = "Baidu"
|
||||||
|
newWeb.searchUrl = "https://www.baidu.com/s?wd="
|
||||||
|
await AppDataSource.manager.save(newWeb)
|
||||||
|
}
|
27
src/installer/Bilibili.ts
Normal file
27
src/installer/Bilibili.ts
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
import { AppDataSource } from "../data-source"
|
||||||
|
import { Web } from "../entity/Web"
|
||||||
|
|
||||||
|
export const insBilibili = async ()=>{
|
||||||
|
await AppDataSource.initialize()
|
||||||
|
const BiliWeb = new Web()
|
||||||
|
BiliWeb.name="Bilibili"
|
||||||
|
BiliWeb.fromUrl="https://www.bilibili.com/"
|
||||||
|
BiliWeb.searchUrl="https://search.bilibili.com/all?keyword="
|
||||||
|
await AppDataSource.manager.save(BiliWeb)
|
||||||
|
const findBili = await AppDataSource.manager.find(Web)
|
||||||
|
console.log("Created",findBili)
|
||||||
|
await AppDataSource.destroy()
|
||||||
|
}
|
||||||
|
|
||||||
|
// export const insBilibili = async ()=>{
|
||||||
|
// const getBili = await AppDataSource.manager.findOne(Web,{
|
||||||
|
// where:{
|
||||||
|
// name:"Bilibili"
|
||||||
|
// }
|
||||||
|
// })
|
||||||
|
// console.log(getBili)
|
||||||
|
// getBili.fromUrl="https://www.bilibili.com/"
|
||||||
|
// getBili.searchUrl="https://search.bilibili.com/all?keyword="
|
||||||
|
// const nextBili = await AppDataSource.manager.save(getBili)
|
||||||
|
// console.log(nextBili)
|
||||||
|
// }
|
15
tsconfig.json
Normal file
15
tsconfig.json
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
{
|
||||||
|
"compilerOptions": {
|
||||||
|
"lib": [
|
||||||
|
"es5",
|
||||||
|
"es6"
|
||||||
|
],
|
||||||
|
"target": "es5",
|
||||||
|
"module": "commonjs",
|
||||||
|
"moduleResolution": "node",
|
||||||
|
"outDir": "./build",
|
||||||
|
"emitDecoratorMetadata": true,
|
||||||
|
"experimentalDecorators": true,
|
||||||
|
"sourceMap": true
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user