测试提交

This commit is contained in:
ZtRXR 2023-08-27 14:52:58 +08:00
parent b1846b6c14
commit 44fc4c2b64
16 changed files with 2752 additions and 132 deletions

136
.gitignore vendored
View File

@ -1,132 +1,6 @@
# ---> Node .idea/
# Logs .vscode/
logs
*.log
npm-debug.log*
yarn-debug.log*
yarn-error.log*
lerna-debug.log*
.pnpm-debug.log*
# Diagnostic reports (https://nodejs.org/api/report.html)
report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
# Runtime data
pids
*.pid
*.seed
*.pid.lock
# Directory for instrumented libs generated by jscoverage/JSCover
lib-cov
# Coverage directory used by tools like istanbul
coverage
*.lcov
# nyc test coverage
.nyc_output
# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
.grunt
# Bower dependency directory (https://bower.io/)
bower_components
# node-waf configuration
.lock-wscript
# Compiled binary addons (https://nodejs.org/api/addons.html)
build/Release
# Dependency directories
node_modules/ node_modules/
jspm_packages/ build/
tmp/
# Snowpack dependency directory (https://snowpack.dev/) temp/
web_modules/
# TypeScript cache
*.tsbuildinfo
# Optional npm cache directory
.npm
# Optional eslint cache
.eslintcache
# Optional stylelint cache
.stylelintcache
# Microbundle cache
.rpt2_cache/
.rts2_cache_cjs/
.rts2_cache_es/
.rts2_cache_umd/
# Optional REPL history
.node_repl_history
# Output of 'npm pack'
*.tgz
# Yarn Integrity file
.yarn-integrity
# dotenv environment variable files
.env
.env.development.local
.env.test.local
.env.production.local
.env.local
# parcel-bundler cache (https://parceljs.org/)
.cache
.parcel-cache
# Next.js build output
.next
out
# Nuxt.js build / generate output
.nuxt
dist
# Gatsby files
.cache/
# Comment in the public line in if your project uses Gatsby and not Next.js
# https://nextjs.org/blog/next-9-1#public-directory-support
# public
# vuepress build output
.vuepress/dist
# vuepress v2.x temp and cache directory
.temp
.cache
# Docusaurus cache and generated files
.docusaurus
# Serverless directories
.serverless/
# FuseBox cache
.fusebox/
# DynamoDB Local files
.dynamodb/
# TernJS port file
.tern-port
# Stores VSCode versions used for testing VSCode extensions
.vscode-test
# yarn v2
.yarn/cache
.yarn/unplugged
.yarn/build-state.yml
.yarn/install-state.gz
.pnp.*

View File

@ -1,2 +1,7 @@
# DataSpider2 # Awesome Project Build with TypeORM
Steps to run this project:
1. Run `npm i` command
2. Setup database settings inside `data-source.ts` file
3. Run `npm start` command

2397
package-lock.json generated Normal file

File diff suppressed because it is too large Load Diff

24
package.json Normal file
View File

@ -0,0 +1,24 @@
{
"name": "typeorm-sample",
"version": "0.0.1",
"description": "Awesome project developed with TypeORM.",
"type": "commonjs",
"devDependencies": {
"@types/jsdom": "^21.1.2",
"@types/node": "^16.11.10",
"ts-node": "10.7.0",
"typescript": "4.5.2"
},
"dependencies": {
"axios": "^1.4.0",
"jsdom": "^22.1.0",
"mysql": "^2.14.1",
"reflect-metadata": "^0.1.13",
"sqlite3": "^5.1.6",
"typeorm": "0.3.17"
},
"scripts": {
"start": "ts-node src/index.ts",
"typeorm": "typeorm-ts-node-commonjs"
}
}

34
src/Spiders/Baidu.ts Normal file
View File

@ -0,0 +1,34 @@
import axios from 'axios';
import {JSDOM} from 'jsdom'
import { Hot } from '../entity/Hot';
import { AppDataSource } from '../data-source';
import { Web } from '../entity/Web';
import { insBaidu } from '../installer/Baidu';
import { SpiderMain } from './main';
export const runBaiduSpider = async ()=>{
await main()
}
const main = async ()=>{
const {window} = await JSDOM.fromURL("https://top.baidu.com/board?tab=realtime")
const document = window.document
let getClasses = document.querySelectorAll(".c-single-text-ellipsis")
let ListHots:Hot[] = []
getClasses.forEach(d=>{
const NewHot = new Hot()
NewHot.word=d.innerHTML.trim()
ListHots.push(NewHot)
})
let fromWeb = await AppDataSource.manager.findOneBy(Web,{
name:"Baidu"
})
if(fromWeb == null){
await insBaidu()
fromWeb = await AppDataSource.manager.findOneByOrFail(Web,{
name:"Baidu"
})
}
SpiderMain(fromWeb,ListHots)
}

35
src/Spiders/Bilibili.ts Normal file
View File

@ -0,0 +1,35 @@
import axios from "axios"
import { Hot } from "../entity/Hot"
import { AppDataSource } from "../data-source"
import { Web } from "../entity/Web"
import { Search } from "../entity/Search"
import { IsNull } from "typeorm"
import { Times } from "../entity/Times"
import { SpiderMain } from "./main"
import { insBilibili } from "../installer/Bilibili"
export const runBiliSpider = async () => {
await main()
}
const main = async () => {
const axiosRaw = await axios.get("https://api.bilibili.com/x/web-interface/wbi/search/square?limit=50")
const getList: any[] = axiosRaw.data.data.trending.list
const crtHots: Hot[] = []
getList.forEach(d => {
const newHot = new Hot()
newHot.word = d.keyword
crtHots.push(newHot)
})
// console.log(crtHots)
let fromWeb = await AppDataSource.manager.findOneBy(Web, {
name: "Bilibili"
})
if (fromWeb == null) {
await insBilibili()
fromWeb = await AppDataSource.manager.findOneByOrFail(Web, {
name: "Bilibili"
})
}
await SpiderMain(fromWeb, crtHots)
}

42
src/Spiders/main.ts Normal file
View File

@ -0,0 +1,42 @@
import { AppDataSource } from "../data-source";
import { Hot } from "../entity/Hot";
import { Search } from "../entity/Search";
import { Times } from "../entity/Times";
import { Web } from "../entity/Web";
export const SpiderMain = async (fromWeb: Web, HotList: Hot[]) => {
//新建一个搜索
let crtSearch = new Search()
const nowDate = new Date()
crtSearch.date = nowDate
crtSearch.fromWeb = fromWeb
crtSearch = await AppDataSource.manager.save(crtSearch)
await Promise.all(HotList.map(async d => {
d.fromSearch = crtSearch
let fromTimes = new Times()
//获取历史统计次数
fromTimes = await AppDataSource.manager.findOneBy(Times,{
word:d.word,
fromWeb:fromWeb
})
//如果为空那么创建一个
if(fromTimes==null){
fromTimes = new Times()
fromTimes.times=0
fromTimes.word = d.word
fromTimes.fromWeb =fromWeb
fromTimes.lastTime = nowDate
fromTimes = await AppDataSource.manager.save(fromTimes)
}
//增加统计次数的出现次数
fromTimes.times++
//更新日期
fromTimes.lastTime = nowDate
fromTimes = await AppDataSource.manager.save(fromTimes)
d.fromTimes = fromTimes
await AppDataSource.manager.save(d)
}))
}

35
src/data-source.ts Normal file
View File

@ -0,0 +1,35 @@
import "reflect-metadata"
import { DataSource } from "typeorm"
import { Search } from "./entity/Search"
import { Hot } from "./entity/Hot"
import { Web } from "./entity/Web"
import { Times } from "./entity/Times"
import { readFileSync } from "fs"
import path = require("path")
// import { main } from "."
export const AppDataSource = new DataSource({
type: "mysql",
host: "localhost",
port: 3306,
username: "root",
password: "ZengtudorRXR2008",
// password: "Zengtudor",
database: "hot",
synchronize: true,
logging: false,
entities: [Search,Hot,Web,Times],
migrations: [],
subscribers: [],
})
// export const AppDataSource = new DataSource({
// type: "sqlite",
// database: "./hot.sqlite",
// synchronize: true,
// logging: false,
// entities: [Search,Hot,Web,Times],
// migrations: [],
// subscribers: [],
// })

15
src/entity/Hot.ts Normal file
View File

@ -0,0 +1,15 @@
import { Column, Entity, ManyToOne, PrimaryGeneratedColumn } from "typeorm";
import { Search } from "./Search";
import { Times } from "./Times";
@Entity()
export class Hot{
@PrimaryGeneratedColumn()
id:number
@Column()
word:string
@ManyToOne(()=>Search,(search)=>search.Hots)
fromSearch:Search
@ManyToOne((type)=>Times,(time)=>time.Hots)
fromTimes
}

15
src/entity/Search.ts Normal file
View File

@ -0,0 +1,15 @@
import { Column, Entity, ManyToOne, OneToMany, PrimaryGeneratedColumn } from "typeorm";
import { Hot } from "./Hot";
import { Web } from "./Web";
@Entity()
export class Search{
@PrimaryGeneratedColumn()
id:number
@OneToMany((type)=>Hot,(hot)=>hot.fromSearch)
Hots:Hot[]
@ManyToOne((type)=>Web,(web)=>web.Searches)
fromWeb
@Column()
date:Date
}

19
src/entity/Times.ts Normal file
View File

@ -0,0 +1,19 @@
import { Column, Entity, ManyToOne, OneToMany, OneToOne, PrimaryGeneratedColumn } from "typeorm";
import { Hot } from "./Hot";
import { Web } from "./Web";
@Entity()
export class Times{
@PrimaryGeneratedColumn()
id:number
@OneToMany((type)=>Hot,(hot)=>hot.fromTimes)
Hots:Hot[]
@Column()
times:number
@Column()
word:string
@ManyToOne(()=>Web,(web)=>web.times)
fromWeb:Web
@Column()
lastTime:Date
}

19
src/entity/Web.ts Normal file
View File

@ -0,0 +1,19 @@
import { Column, Entity, OneToMany, PrimaryGeneratedColumn } from "typeorm";
import { Search } from "./Search";
import { Times } from "./Times";
@Entity()
export class Web{
@PrimaryGeneratedColumn()
id:number
@OneToMany((type)=>Search,(search)=>search.fromWeb)
Searches:Search[]
@Column()
name:string
@Column()
fromUrl:string
@Column()
searchUrl:string
@OneToMany(()=>Times,(times)=>times.fromWeb)
times:Times[]
}

54
src/index.ts Normal file
View File

@ -0,0 +1,54 @@
import { readFileSync } from "fs";
import { runBiliSpider } from "./Spiders/Bilibili";
import { AppDataSource } from "./data-source"
import { Web } from "./entity/Web"
import { runBaiduSpider } from "./Spiders/Baidu";
let runTimes = 0;
const main = async () => {
const startTime = new Date()
await AppDataSource.initialize()
// await AppDataSource.synchronize()
// await insBilibili()
let tasks: Promise<void>[] = []
tasks.push(runBiliSpider())
tasks.push(runBaiduSpider())
await Promise.all(tasks)
setTimeout(async ()=>{
await AppDataSource.destroy()
console.log("已关闭数据库...")
},1000*10)
console.log("延时数据库关闭....","10s")
// setTimeout(async ()=>{
// console.log("数据库已关闭。。。")
// },1000*10)
// await AppDataSource.destroy()
const endTime = new Date()
const span = new Date(endTime.getTime() - startTime.getTime())
runTimes++;
console.log("时间:", endTime.toLocaleString(), "任务完成用时:", span.getMilliseconds(), "毫秒 || ", span.getSeconds(), "秒", `${runTimes}次完成任务`)
}
main().then(() => {
console.log("first test run is OK!!!")
// AppDataSource.destroy()
}).catch(e => {
console.error("first run error !!!!!!!!!!!!!!!", e)
// process.exit(1)
})
// setInterval(()=>{
// main().then(()=>{
// console.log("run is OK!!!")
// }).catch(e=>{
// console.error("run error !!!!!!!!!!!!!!!",e)
// })
// },1000*60*15)

10
src/installer/Baidu.ts Normal file
View File

@ -0,0 +1,10 @@
import { AppDataSource } from "../data-source"
import { Web } from "../entity/Web"
export const insBaidu = async ()=>{
const newWeb = new Web()
newWeb.fromUrl = "https://www.baidu.com/"
newWeb.name = "Baidu"
newWeb.searchUrl = "https://www.baidu.com/s?wd="
await AppDataSource.manager.save(newWeb)
}

27
src/installer/Bilibili.ts Normal file
View File

@ -0,0 +1,27 @@
import { AppDataSource } from "../data-source"
import { Web } from "../entity/Web"
export const insBilibili = async ()=>{
await AppDataSource.initialize()
const BiliWeb = new Web()
BiliWeb.name="Bilibili"
BiliWeb.fromUrl="https://www.bilibili.com/"
BiliWeb.searchUrl="https://search.bilibili.com/all?keyword="
await AppDataSource.manager.save(BiliWeb)
const findBili = await AppDataSource.manager.find(Web)
console.log("Created",findBili)
await AppDataSource.destroy()
}
// export const insBilibili = async ()=>{
// const getBili = await AppDataSource.manager.findOne(Web,{
// where:{
// name:"Bilibili"
// }
// })
// console.log(getBili)
// getBili.fromUrl="https://www.bilibili.com/"
// getBili.searchUrl="https://search.bilibili.com/all?keyword="
// const nextBili = await AppDataSource.manager.save(getBili)
// console.log(nextBili)
// }

15
tsconfig.json Normal file
View File

@ -0,0 +1,15 @@
{
"compilerOptions": {
"lib": [
"es5",
"es6"
],
"target": "es5",
"module": "commonjs",
"moduleResolution": "node",
"outDir": "./build",
"emitDecoratorMetadata": true,
"experimentalDecorators": true,
"sourceMap": true
}
}