scraperss.js 1.41 KB
Newer Older
Kim, Chaerin's avatar
?    
Kim, Chaerin committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import axios from "axios";
import cheerio from "cheerio";
import express from 'express';
import request from 'request-promise'
import jschardet from 'jschardet'
import iconv from 'iconv'
import fs from 'fs'
const Iconv = iconv.Iconv

const app = express()

app.get('/', (req, res) => {
    const url = "https://www.google.com/search?q=%ED%95%9C%EB%9D%BC%EC%82%B0%20site%3Atistory.com&oq=tistory&aqs=chrome..69i57j0l4j69i60l3.1746j0j4&sourceid=chrome&ie=UTF-8&ved=2ahUKEwis_bSFz4buAhWVdXAKHU0tBaoQ2wF6BAgIEAE&ei=T1D1X-yZD5XrwQPN2pTQCg"

    request(url)
        .then(anyToUtf8)
        .then((html) => {
            // fs.writeFileSync("googlez.txt", '\ufeff' + html, { encoding: 'utf8' });

            let $ = cheerio.load(html, null, false);
            let places = []
            $('.kCrYT').each(function (i) {
                places[i] = {
                    title: $(this).find('h3').text(),
                    link: $(this).find('a').attr('href'),
                    summary: $(this).text(),
                }
            })
            console.log(places)
        })
    function anyToUtf8(str) {
        const { encoding } = jschardet.detect(str);
        console.log("source encoding = " + encoding);
        const iconv = new Iconv(encoding, "utf-8//translit//ignore");
        return iconv.convert(str).toString();
    }
    res.send("안녕")
})

app.listen(3001, () => {
    console.log('Server is listening on port 3001')
})