Skip to content

Commit

Permalink
Move protos to shared/proto
Browse files Browse the repository at this point in the history
  • Loading branch information
daviddengcn committed Apr 15, 2018
1 parent d320a1b commit 28ee850
Show file tree
Hide file tree
Showing 29 changed files with 848 additions and 485 deletions.
13 changes: 6 additions & 7 deletions crawler.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,7 @@ import (
"github.com/daviddengcn/sophie"
"github.com/golang/gddo/gosrc"

sppb "github.com/daviddengcn/gcse/proto/spider"
stpb "github.com/daviddengcn/gcse/proto/store"
gpb "github.com/daviddengcn/gcse/shared/proto"
glgddo "github.com/golang/gddo/doc"
)

Expand Down Expand Up @@ -350,7 +349,7 @@ var GithubSpider *github.Spider

const maxRepoInfoAge = 2 * timep.Day

func CrawlRepoInfo(ctx context.Context, site, user, name string) *sppb.RepoInfo {
func CrawlRepoInfo(ctx context.Context, site, user, name string) *gpb.RepoInfo {
// Check cache in store.
path := user + "/" + name
p, err := store.ReadPackage(site, path)
Expand All @@ -373,7 +372,7 @@ func CrawlRepoInfo(ctx context.Context, site, user, name string) *sppb.RepoInfo
}
return nil
}
if err := store.UpdatePackage(site, path, func(info *stpb.PackageInfo) error {
if err := store.UpdatePackage(site, path, func(info *gpb.PackageInfo) error {
info.RepoInfo = ri
return nil
}); err != nil {
Expand All @@ -392,7 +391,7 @@ func getGithubStars(ctx context.Context, user, name string) int {
return -1
}

func getGithub(ctx context.Context, pkg string) (*doc.Package, []*sppb.FolderInfo, error) {
func getGithub(ctx context.Context, pkg string) (*doc.Package, []*gpb.FolderInfo, error) {
parts := strings.SplitN(pkg, "/", 4)
for len(parts) < 4 {
parts = append(parts, "")
Expand Down Expand Up @@ -421,7 +420,7 @@ func getGithub(ctx context.Context, pkg string) (*doc.Package, []*sppb.FolderInf
}, folders, nil
}

func CrawlPackage(ctx context.Context, httpClient doc.HttpClient, pkg string, etag string) (p *Package, folders []*sppb.FolderInfo, err error) {
func CrawlPackage(ctx context.Context, httpClient doc.HttpClient, pkg string, etag string) (p *Package, folders []*gpb.FolderInfo, err error) {
defer func() {
if perr := recover(); perr != nil {
p, folders, err = nil, nil, errorsp.NewWithStacks("Panic when crawling package %s: %v", pkg, perr)
Expand Down Expand Up @@ -537,7 +536,7 @@ func CrawlPerson(ctx context.Context, httpClient doc.HttpClient, id string) (*Pe
for name, ri := range u.Repos {
path := user + "/" + name
p.Packages = append(p.Packages, "github.com/"+path)
if err := store.UpdatePackage(site, path, func(info *stpb.PackageInfo) error {
if err := store.UpdatePackage(site, path, func(info *gpb.PackageInfo) error {
info.RepoInfo = ri
return nil
}); err != nil {
Expand Down
23 changes: 11 additions & 12 deletions pipelines/crawler/package.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,7 @@ import (
"github.com/daviddengcn/sophie/kv"
"github.com/daviddengcn/sophie/mr"

sppb "github.com/daviddengcn/gcse/proto/spider"
stpb "github.com/daviddengcn/gcse/proto/store"
gpb "github.com/daviddengcn/gcse/shared/proto"
)

const (
Expand All @@ -47,17 +46,17 @@ func appendNewPackage(pkg, foundWay string) {
cDB.AppendPackage(pkg, allDocsPkgs.Contain)

site, path := utils.SplitPackage(pkg)
if err := store.UpdatePackage(site, path, func(*stpb.PackageInfo) error {
if err := store.UpdatePackage(site, path, func(*gpb.PackageInfo) error {
return nil
}); err != nil {
log.Printf("UpdatePackage %s %s failed: %v", site, path, err)
}
if err := store.AppendPackageEvent(site, path, foundWay, time.Now(), sppb.HistoryEvent_Action_None); err != nil {
if err := store.AppendPackageEvent(site, path, foundWay, time.Now(), gpb.HistoryEvent_Action_None); err != nil {
log.Printf("UpdatePackageHistory %s %s failed: %v", site, path, err)
}
}

func fillPackageInfo(p *gcse.Package, pi *stpb.PackageInfo) {
func fillPackageInfo(p *gcse.Package, pi *gpb.PackageInfo) {
pi.Package = p.Package
pi.Name = p.Name
pi.Synopsis = p.Synopsis
Expand All @@ -84,7 +83,7 @@ func fillPackageInfo(p *gcse.Package, pi *stpb.PackageInfo) {
}
}

func saveRelatedInfo(pi *stpb.PackageInfo) {
func saveRelatedInfo(pi *gpb.PackageInfo) {
// append new authors
var site, id string
if strings.HasPrefix(pi.Package, "github.com/") {
Expand All @@ -94,7 +93,7 @@ func saveRelatedInfo(pi *stpb.PackageInfo) {
} else {
return
}
if err := store.UpdatePerson(site, id, func(*stpb.PersonInfo) error {
if err := store.UpdatePerson(site, id, func(*gpb.PersonInfo) error {
// TODO update history
return nil
}); err != nil {
Expand Down Expand Up @@ -192,7 +191,7 @@ func (pc *PackageCrawler) Map(key, val sophie.SophieWriter, c []sophie.Collector
if err != nil && errorsp.Cause(err) != gcse.ErrPackageNotModifed {
log.Printf("[Part %d] Crawling pkg %s failed: %v", pc.part, pkg, err)
if gcse.IsBadPackage(err) {
utils.LogError(store.AppendPackageEvent(site, path, "", time.Now(), sppb.HistoryEvent_Action_Invalid), "AppendPackageEvent %v %v failed", site, path)
utils.LogError(store.AppendPackageEvent(site, path, "", time.Now(), gpb.HistoryEvent_Action_Invalid), "AppendPackageEvent %v %v failed", site, path)
bi.AddValue(bi.Sum, "crawler.package.wrong-package", 1)
// a wrong path
nda := gcse.NewDocAction{
Expand All @@ -202,7 +201,7 @@ func (pc *PackageCrawler) Map(key, val sophie.SophieWriter, c []sophie.Collector
cDB.PackageDB.Delete(pkg)
log.Printf("[Part %d] Remove wrong package %s", pc.part, pkg)
} else {
utils.LogError(store.AppendPackageEvent(site, path, "", time.Now(), sppb.HistoryEvent_Action_Failed), "AppendPackageEvent %v %v failed", site, path)
utils.LogError(store.AppendPackageEvent(site, path, "", time.Now(), gpb.HistoryEvent_Action_Failed), "AppendPackageEvent %v %v failed", site, path)
bi.Inc("crawler.package.failed")
if strings.HasPrefix(pkg, "github.com/") {
bi.Inc("crawler.package.failed.github")
Expand All @@ -227,7 +226,7 @@ func (pc *PackageCrawler) Map(key, val sophie.SophieWriter, c []sophie.Collector
}
return nil
}
utils.LogError(store.AppendPackageEvent(site, path, "", time.Now(), sppb.HistoryEvent_Action_Success), "AppendPackageEvent %v %v failed", site, path)
utils.LogError(store.AppendPackageEvent(site, path, "", time.Now(), gpb.HistoryEvent_Action_Success), "AppendPackageEvent %v %v failed", site, path)
pc.failCount = 0
if errorsp.Cause(err) == gcse.ErrPackageNotModifed {
// TODO crawling stars for unchanged project
Expand All @@ -242,8 +241,8 @@ func (pc *PackageCrawler) Map(key, val sophie.SophieWriter, c []sophie.Collector
}
log.Printf("[Part %d] Crawled package %s success!", pc.part, pkg)

var pkgInfo *stpb.PackageInfo
if err := store.UpdatePackage(site, path, func(pi *stpb.PackageInfo) error {
var pkgInfo *gpb.PackageInfo
if err := store.UpdatePackage(site, path, func(pi *gpb.PackageInfo) error {
fillPackageInfo(p, pi)
pkgInfo = pi
return nil
Expand Down
18 changes: 9 additions & 9 deletions pipelines/spider/spider.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,20 +10,20 @@ import (
"github.com/golangplus/time"

"github.com/daviddengcn/gcse/configs"
"github.com/daviddengcn/gcse/proto/spider"
"github.com/daviddengcn/gcse/proto/store"
"github.com/daviddengcn/gcse/spider/github"
"github.com/daviddengcn/gcse/store"

gpb "github.com/daviddengcn/gcse/shared/proto"
)

type RepositoryInfo struct {
*stpb.Repository
*gpb.Repository

User string
Name string
}

func needCrawl(r *stpb.Repository) bool {
func needCrawl(r *gpb.Repository) bool {
if r.CrawlingInfo == nil {
return true
}
Expand All @@ -47,7 +47,7 @@ func selectRepos(site string, maxCrawl int) ([]*RepositoryInfo, error) {
repos := heap.NewInterfaces(func(x, y interface{}) bool {
return shouldCrawlLater(x.(*RepositoryInfo), y.(*RepositoryInfo))
}, maxCrawl)
if err := store.ForEachRepositoryOfSite(site, func(user, name string, doc *stpb.Repository) error {
if err := store.ForEachRepositoryOfSite(site, func(user, name string, doc *gpb.Repository) error {
if !needCrawl(doc) {
return nil
}
Expand Down Expand Up @@ -75,7 +75,7 @@ func crawlRepo(ctx context.Context, site string, repo *RepositoryInfo) error {
if site != "github.com" {
return errorsp.NewWithStacks("Cannot crawl the repository in %v", site)
}
repo.CrawlingInfo = &sppb.CrawlingInfo{}
repo.CrawlingInfo = &gpb.CrawlingInfo{}
repo.CrawlingInfo.SetCrawlingTime(now())

sha, err := githubSpider.RepoBranchSHA(ctx, repo.User, repo.Name, repo.Branch)
Expand All @@ -87,8 +87,8 @@ func crawlRepo(ctx context.Context, site string, repo *RepositoryInfo) error {
}
repo.Signature = sha

repo.Packages = make(map[string]*sppb.Package)
if err := githubSpider.ReadRepo(ctx, repo.User, repo.Name, repo.Signature, func(path string, doc *sppb.Package) error {
repo.Packages = make(map[string]*gpb.Package)
if err := githubSpider.ReadRepo(ctx, repo.User, repo.Name, repo.Signature, func(path string, doc *gpb.Package) error {
log.Printf("Package: %v", doc)
repo.Packages[path] = doc
return nil
Expand All @@ -106,7 +106,7 @@ func crawlAndSaveRepo(ctx context.Context, site string, repo *RepositoryInfo) er
}
return err
}
return store.UpdateRepository(site, repo.User, repo.Name, func(doc *stpb.Repository) error {
return store.UpdateRepository(site, repo.User, repo.Name, func(doc *gpb.Repository) error {
*doc = *repo.Repository
return nil
})
Expand Down
Loading

0 comments on commit 28ee850

Please sign in to comment.