-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathutils.go
179 lines (162 loc) · 4.86 KB
/
utils.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
package main
import (
"bytes"
"context"
"encoding/json"
"errors"
"fmt"
"io"
"os"
"strconv"
"strings"
"time"
twitterscraper "github.com/n0madic/twitter-scraper"
)
// Types
type ByLikeAndRetweet []*twitterscraper.TweetResult
func removeDuplicateValues(intSlice []*twitterscraper.TweetResult) []*twitterscraper.TweetResult {
keys := make(map[string]bool)
list := []*twitterscraper.TweetResult{}
// If the key(values of the slice) is not equal
// to the already present value in new slice (list)
// then we append it. else we jump on another element.
for _, entry := range intSlice {
if _, value := keys[entry.ID]; !value {
keys[entry.ID] = true
list = append(list, entry)
}
}
return list
}
func isFileEmpty(path string) (bool, error) {
f, err := os.OpenFile(path, os.O_RDONLY, 0444)
if err != nil {
return false, err
}
b := make([]byte, 1)
_, err = f.Read(b)
if err != nil {
if errors.Is(err, io.EOF) {
return true, nil
}
return false, err
}
if bytes.Equal(b, []byte{}) {
return true, nil
} else {
return false, nil
}
}
func loadTweetsFromFile(path string) ([]*Tweet, error) {
if empty, err := isFileEmpty(path); err != nil {
return nil, fmt.Errorf("error occurred during check emptiness of %s file - %w", path, err)
} else if empty {
return nil, fmt.Errorf("the %s file is emtpy", path)
}
f, err := os.Open(path)
defer f.Close()
if err != nil {
return nil, err
}
tweets := []*Tweet{}
err = json.NewDecoder(f).Decode(&tweets)
return tweets, err
}
// APPENDS TO FILE
func dumpTweetsToFile(path string, tweets []*Tweet) error {
f, err := os.OpenFile(path, os.O_APPEND|os.O_WRONLY, os.ModeAppend)
if err != nil {
return err
}
return json.NewEncoder(f).Encode(tweets)
}
// TODO: It should fetched only ID field from file only all the values, and then ignore unwanted fields
func loadTweetsIDFromFile(path string) ([]string, error) {
data, err := loadTweetsFromFile(path)
if err != nil {
return nil, err
}
ids := []string{}
for i := range data {
ids = append(ids, data[i].ID)
}
return ids, nil
}
// UNUSED
// func getTweetsIDs(tweets []*Tweet) []string {
// list := []string{}
// for i := range tweets {
// list = append(list, tweets[i].ID)
// }
// return list
// }
// TODO: Do binary search instead of linear
func isIDExistInIDs(id string, ids []string) bool {
for i := range ids {
if ids[i] == id {
return true
}
}
return false
}
// Fetch specific tweets ("برای")
// It's recive stored tweets for don't fetch tweets that already fetched
// Returns:
// fetched tweets, Tweets, Error
func fetchTweets(stored_tweets_id []string) (int, []*twitterscraper.TweetResult, error) {
// fetched count (filtered and unfiltered)
// It's all tweets that program fetched, not only needed ones
totalCount := 0
// Fetched tweets
tweets := []*twitterscraper.TweetResult{}
// Start time for search
startDay := START_DATE
nextDay := startDay.Add(24 * time.Hour)
today := time.Now()
// Twitter scrapper
scraper := twitterscraper.New()
// Fetch tweets from start time to today
for today.After(startDay) {
fmt.Println("The timeframe is from ", startDay.Format("2006-1-2"), " until ", nextDay.Format("2006-1-2")+":")
// Number of twets that fetched by program, not only needed ones
totalCountToday := 0
// Number of tweets that passed the filter and they are was needed ones ("برای" tweets)
totalPassedToday := 0
// Get tweets in loop
for tweet := range scraper.SearchTweets(context.Background(), "برای #مهسا_امینی -filter:retweets "+"since:"+startDay.Format("2006-1-2")+" until:"+nextDay.Format("2006-1-2"), 10000) {
// Increase all fetched count
totalCount++
// Increase today fetched count
totalCountToday++
// Return error if error happened
if tweet.Error != nil {
return 0, nil, fmt.Errorf("error occurred during get tweets - %w", tweet.Error)
}
// If fetched tweet was already exists in the file (already fetched), pass it
if stored_tweets_id != nil {
if isIDExistInIDs(tweet.ID, stored_tweets_id) {
continue
}
}
// If tweet has perfix of "برای" add it to fetched tweets
if strings.HasPrefix(tweet.Text, "برای") {
tweets = append(tweets, tweet)
totalPassedToday++
}
}
// Print fetched tweets in specific date
fmt.Println("Total tweets fetched at " + startDay.Format("2006-1-2") + " is: " + strconv.Itoa(totalCountToday))
fmt.Println("Total \"برای\" tweets fetched at " + startDay.Format("2006-1-2") + " is: " + strconv.Itoa(totalPassedToday) + "\n")
// Change range of days
startDay = nextDay
nextDay = startDay.Add(24 * time.Hour)
}
// Return tweets
return totalCount, tweets, nil
}
// ByLikeAndRetweet methods
func (a ByLikeAndRetweet) Len() int { return len(a) }
func (a ByLikeAndRetweet) Less(i, j int) bool {
return a[i].Likes+a[i].Retweets > a[j].Likes+a[j].Retweets
}
func (a ByLikeAndRetweet) Swap(i, j int) { a[i], a[j] = a[j], a[i] }