Skip to content
This repository was archived by the owner on Mar 11, 2025. It is now read-only.

Commit

Permalink
Fix some issue.
Browse files Browse the repository at this point in the history
  • Loading branch information
solarhell committed Jul 20, 2019
1 parent 920cfe6 commit 3ecf7c8
Show file tree
Hide file tree
Showing 15 changed files with 1,887 additions and 238 deletions.
34 changes: 5 additions & 29 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,50 +10,31 @@ package main

import (
"fmt"
"github.com/solarhell/ZhihuZhuanlanCrawler"
"log"
"net/http"
"os"
"time"
Zhihu "github.com/solarhell/ZhihuZhuanlanCrawler"
)

func main() {
log.SetFlags(log.LstdFlags | log.Lshortfile)

const debug = true

c := ZhihuZhuanlanCrawler.NewClient(&http.Client{
Timeout: 30 * time.Second,
Transport: &ZhihuZhuanlanCrawler.DebugRequestTransport{
RequestHeader: debug,
RequestBody: debug,
ResponseHeader: debug,
ResponseBody: debug,
Transport: &http.Transport{
IdleConnTimeout: 30 * time.Second,
},
},
})

const columnName = "OTalk"

pinnedArticlePidAndAuthor, err := c.GetPinnedArticlePidAndAuthor(columnName)
pinnedArticlePidAndAuthor, err := Zhihu.GetPinnedArticlePidAndAuthor(columnName)
if err != nil {
log.Println(err)
os.Exit(1)
}

fmt.Printf("%+v\n", *pinnedArticlePidAndAuthor)

pinnedArticle, err := c.GetSingleArticle(pinnedArticlePidAndAuthor.ID)
pinnedArticle, err := Zhihu.GetSingleArticle(pinnedArticlePidAndAuthor.ID)
if err != nil {
log.Println(err)
os.Exit(1)
}

fmt.Printf("%+v\n", *pinnedArticle)

pids, err := c.GetArticlesListPids(columnName)
pids, err := Zhihu.GetArticlesListPids(columnName)
if err != nil {
log.Println(err)
os.Exit(1)
Expand All @@ -63,7 +44,7 @@ func main() {
if pid == pinnedArticle.ID {
continue
}
article, err := c.GetSingleArticle(pid)
article, err := Zhihu.GetSingleArticle(pid)
if err != nil {
log.Println(err)
os.Exit(1)
Expand All @@ -72,8 +53,3 @@ func main() {
}
}
```

## credits

httpClient的代码来自 https://github.com/mozillazg/go-cos/blob/master/debug/http.go 感谢🙏

46 changes: 7 additions & 39 deletions client.go
Original file line number Diff line number Diff line change
@@ -1,50 +1,18 @@
package ZhihuZhuanlanCrawler

import (
"io/ioutil"
"log"
"net/http"
"github.com/imroc/req"
)

type Client struct {
client *http.Client
}

func NewClient(httpClient *http.Client) *Client {
if httpClient == nil {
httpClient = &http.Client{}
}

c := &Client{
client: httpClient,
}

return c
}

func (c *Client) SendNewZhihuRequest(u string) ([]byte, error) {
req, err := http.NewRequest("GET", u, nil)
if err != nil {
return nil, err
}

req.Header.Add("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.75 Safari/537.36")
req.Header.Add("Host", "zhuanlan.zhihu.com")
req.Header.Add("Referer", "https://zhuanlan.zhihu.com/")

res, err := c.client.Do(req)
if err != nil {
log.Println(err)
return nil, err
}

defer res.Body.Close()
func sendNewZhihuRequest(u string) ([]byte, error) {
r, err := req.Get(u, req.Header{
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.75 Safari/537.36",
"Referer": "https://zhuanlan.zhihu.com/",
}, nil)

bodyByte, err := ioutil.ReadAll(res.Body)
if err != nil {
log.Println(err)
return nil, err
}

return bodyByte, nil
return r.ToBytes()
}
14 changes: 7 additions & 7 deletions crawler.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@ import (
"log"
)

func (c *Client) GetPinnedArticlePidAndAuthor(columnName string) (*PinnedArticleAndAuthor, error) {
func GetPinnedArticlePidAndAuthor(columnName string) (*PinnedArticleAndAuthor, error) {
if columnName == "" {
return nil, ColumnNameCanNotBeEmpty
}
u := fmt.Sprintf("https://zhuanlan.zhihu.com/api/columns/%s/pinned-article", columnName)
res, err := c.SendNewZhihuRequest(u)
res, err := sendNewZhihuRequest(u)
if err != nil {
log.Println(err)
return nil, err
Expand All @@ -27,12 +27,12 @@ func (c *Client) GetPinnedArticlePidAndAuthor(columnName string) (*PinnedArticle
return &pinnedArticleAndAuthor, nil
}

func (c *Client) GetSingleArticle(pid int) (*Article, error) {
func GetSingleArticle(pid int) (*Article, error) {
if pid == 0 {
return nil, PidCanNotBeEmpty
}
u := fmt.Sprintf("https://api.zhihu.com/articles/%d", pid)
res, err := c.SendNewZhihuRequest(u)
res, err := sendNewZhihuRequest(u)
if err != nil {
log.Println(err)
return nil, err
Expand All @@ -48,7 +48,7 @@ func (c *Client) GetSingleArticle(pid int) (*Article, error) {
return &article, nil
}

func (c *Client) GetArticlesListPids(columnName string) ([]int, error) {
func GetArticlesListPids(columnName string) ([]int, error) {
if columnName == "" {
return nil, ColumnNameCanNotBeEmpty
}
Expand All @@ -57,7 +57,7 @@ func (c *Client) GetArticlesListPids(columnName string) ([]int, error) {
var offset = 0

u := fmt.Sprintf("https://zhuanlan.zhihu.com/api/columns/%s/articles?limit=%d&offset=%d", columnName, limit, offset)
res, err := c.SendNewZhihuRequest(u)
res, err := sendNewZhihuRequest(u)
if err != nil {
log.Println(err)
return nil, err
Expand All @@ -78,7 +78,7 @@ func (c *Client) GetArticlesListPids(columnName string) ([]int, error) {

for offset = offset + limit; offset < articleList.Paging.Totals; offset = offset + limit {
u := fmt.Sprintf("https://zhuanlan.zhihu.com/api/columns/%s/articles?limit=%d&offset=%d", columnName, limit, offset)
res, err := c.SendNewZhihuRequest(u)
res, err := sendNewZhihuRequest(u)
if err != nil {
log.Println(err)
return nil, err
Expand Down
19 changes: 4 additions & 15 deletions crawler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,36 +3,25 @@ package ZhihuZhuanlanCrawler
import "testing"

const columnName = "OTalk" // https://zhuanlan.zhihu.com/Otalk
const pid = 41604227 // https://zhuanlan.zhihu.com/p/41604227

const pid = 60968502 // https://zhuanlan.zhihu.com/p/60968502

func TestClient_GetPinnedArticlePidAndAuthor(t *testing.T) {
c := NewClient(nil)
_, err := c.GetPinnedArticlePidAndAuthor(columnName)
_, err := GetPinnedArticlePidAndAuthor(columnName)
if err != nil {
t.Error(err.Error())
}

t.Log("GetPinnedArticlePidAndAuthor ok")

}

func TestClient_GetArticlesListPids(t *testing.T) {
c := NewClient(nil)
_, err := c.GetArticlesListPids(columnName)
_, err := GetArticlesListPids(columnName)
if err != nil {
t.Error(err.Error())
}

t.Log("GetArticlesListPids ok")
}

func TestClient_GetSingleArticle(t *testing.T) {
c := NewClient(nil)
_, err := c.GetSingleArticle(pid)
_, err := GetSingleArticle(pid)
if err != nil {
t.Error(err.Error())
}

t.Log("GetSingleArticle ok")
}
2 changes: 2 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
module github.com/solarhell/ZhihuZhuanlanCrawler

go 1.12

require github.com/imroc/req v0.2.4
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
github.com/imroc/req v0.2.4 h1:8XbvaQpERLAJV6as/cB186DtH5f0m5zAOtHEaTQ4ac0=
github.com/imroc/req v0.2.4/go.mod h1:J9FsaNHDTIVyW/b5r6/Df5qKEEEq2WzZKIgKSajd1AE=
70 changes: 0 additions & 70 deletions httpclient.go

This file was deleted.

78 changes: 0 additions & 78 deletions httpclient_test.go

This file was deleted.

Loading

0 comments on commit 3ecf7c8

Please sign in to comment.