diff --git a/Scweet/user.py b/Scweet/user.py index 40a5f3b..7795e36 100644 --- a/Scweet/user.py +++ b/Scweet/user.py @@ -18,40 +18,46 @@ def get_user_information(users, driver=None, headless=True): if user is not None: try: - following = driver.find_element_by_xpath( + following = driver.find_element_by( + 'xpath', '//a[contains(@href,"/following")]/span[1]/span[1]').text - followers = driver.find_element_by_xpath( + followers = driver.find_element_by('xpath', '//a[contains(@href,"/followers")]/span[1]/span[1]').text except Exception as e: # print(e) return try: - element = driver.find_element_by_xpath('//div[contains(@data-testid,"UserProfileHeader_Items")]//a[1]') + element = driver.find_element_by('xpath', '//div[contains(@data-testid,"UserProfileHeader_Items")]//a[1]') website = element.get_attribute("href") except Exception as e: # print(e) website = "" try: - desc = driver.find_element_by_xpath('//div[contains(@data-testid,"UserDescription")]').text + desc = driver.find_element_by('xpath', '//div[contains(@data-testid,"UserDescription")]').text except Exception as e: # print(e) desc = "" a = 0 try: - join_date = driver.find_element_by_xpath( + join_date = driver.find_element_by( + 'xpath', '//div[contains(@data-testid,"UserProfileHeader_Items")]/span[3]').text - birthday = driver.find_element_by_xpath( + birthday = driver.find_element_by( + 'xpath', '//div[contains(@data-testid,"UserProfileHeader_Items")]/span[2]').text - location = driver.find_element_by_xpath( + location = driver.find_element_by( + 'xpath', '//div[contains(@data-testid,"UserProfileHeader_Items")]/span[1]').text except Exception as e: # print(e) try: - join_date = driver.find_element_by_xpath( + join_date = driver.find_element_by( + 'xpath', '//div[contains(@data-testid,"UserProfileHeader_Items")]/span[2]').text - span1 = driver.find_element_by_xpath( + span1 = driver.find_element_by( + 'xpath', '//div[contains(@data-testid,"UserProfileHeader_Items")]/span[1]').text if hasNumbers(span1): birthday = span1 @@ -62,7 +68,8 @@ def get_user_information(users, driver=None, headless=True): except Exception as e: # print(e) try: - join_date = driver.find_element_by_xpath( + join_date = driver.find_element_by( + 'xpath', '//div[contains(@data-testid,"UserProfileHeader_Items")]/span[1]').text birthday = "" location = "" diff --git a/build/lib/Scweet/user.py b/build/lib/Scweet/user.py index 40a5f3b..1f1fac9 100644 --- a/build/lib/Scweet/user.py +++ b/build/lib/Scweet/user.py @@ -18,40 +18,44 @@ def get_user_information(users, driver=None, headless=True): if user is not None: try: - following = driver.find_element_by_xpath( + following = driver.find_element_by( + 'xpath', '//a[contains(@href,"/following")]/span[1]/span[1]').text - followers = driver.find_element_by_xpath( + followers = driver.find_element_by( + 'xpath', '//a[contains(@href,"/followers")]/span[1]/span[1]').text except Exception as e: # print(e) return try: - element = driver.find_element_by_xpath('//div[contains(@data-testid,"UserProfileHeader_Items")]//a[1]') + element = driver.find_element_by('xpath', '//div[contains(@data-testid,"UserProfileHeader_Items")]//a[1]') website = element.get_attribute("href") except Exception as e: # print(e) website = "" try: - desc = driver.find_element_by_xpath('//div[contains(@data-testid,"UserDescription")]').text + desc = driver.find_element_by('xpath', '//div[contains(@data-testid,"UserDescription")]').text except Exception as e: # print(e) desc = "" a = 0 try: - join_date = driver.find_element_by_xpath( + join_date = driver.find_element_by('xpath', '//div[contains(@data-testid,"UserProfileHeader_Items")]/span[3]').text - birthday = driver.find_element_by_xpath( + birthday = driver.find_element_by('xpath', '//div[contains(@data-testid,"UserProfileHeader_Items")]/span[2]').text - location = driver.find_element_by_xpath( + location = driver.find_element_by('xpath', '//div[contains(@data-testid,"UserProfileHeader_Items")]/span[1]').text except Exception as e: # print(e) try: - join_date = driver.find_element_by_xpath( + join_date = driver.find_element_by( + 'xpath', '//div[contains(@data-testid,"UserProfileHeader_Items")]/span[2]').text - span1 = driver.find_element_by_xpath( + span1 = driver.find_element_by( + 'xpath', '//div[contains(@data-testid,"UserProfileHeader_Items")]/span[1]').text if hasNumbers(span1): birthday = span1 @@ -62,7 +66,8 @@ def get_user_information(users, driver=None, headless=True): except Exception as e: # print(e) try: - join_date = driver.find_element_by_xpath( + join_date = driver.find_element_by( + 'xpath', '//div[contains(@data-testid,"UserProfileHeader_Items")]/span[1]').text birthday = "" location = "" diff --git a/build/lib/Scweet/utils.py b/build/lib/Scweet/utils.py index ef3cbf8..e1fa60d 100644 --- a/build/lib/Scweet/utils.py +++ b/build/lib/Scweet/utils.py @@ -29,49 +29,49 @@ def get_data(card, save_images=False, save_dir=None): image_links = [] try: - username = card.find_element_by_xpath('.//span').text + username = card.find_element_by('xpath', './/span').text except: return try: - handle = card.find_element_by_xpath('.//span[contains(text(), "@")]').text + handle = card.find_element_by('xpath', './/span[contains(text(), "@")]').text except: return try: - postdate = card.find_element_by_xpath('.//time').get_attribute('datetime') + postdate = card.find_element_by('xpath', './/time').get_attribute('datetime') except: return try: - text = card.find_element_by_xpath('.//div[2]/div[2]/div[1]').text + text = card.find_element_by('xpath', './/div[2]/div[2]/div[1]').text except: text = "" try: - embedded = card.find_element_by_xpath('.//div[2]/div[2]/div[2]').text + embedded = card.find_element_by('xpath', './/div[2]/div[2]/div[2]').text except: embedded = "" # text = comment + embedded try: - reply_cnt = card.find_element_by_xpath('.//div[@data-testid="reply"]').text + reply_cnt = card.find_element_by('xpath', './/div[@data-testid="reply"]').text except: reply_cnt = 0 try: - retweet_cnt = card.find_element_by_xpath('.//div[@data-testid="retweet"]').text + retweet_cnt = card.find_element_by('xpath', './/div[@data-testid="retweet"]').text except: retweet_cnt = 0 try: - like_cnt = card.find_element_by_xpath('.//div[@data-testid="like"]').text + like_cnt = card.find_element_by('xpath', './/div[@data-testid="like"]').text except: like_cnt = 0 try: - elements = card.find_elements_by_xpath('.//div[2]/div[2]//img[contains(@src, "https://pbs.twimg.com/")]') + elements = card.find_elements_by('xpath', './/div[2]/div[2]//img[contains(@src, "https://pbs.twimg.com/")]') for element in elements: image_links.append(element.get_attribute('src')) except: @@ -83,7 +83,7 @@ def get_data(card, save_images=False, save_dir=None): # handle promoted tweets try: - promoted = card.find_element_by_xpath('.//div[2]/div[2]/[last()]//span').text == "Promoted" + promoted = card.find_element_by('xpath', './/div[2]/div[2]/[last()]//span').text == "Promoted" except: promoted = False if promoted: @@ -91,7 +91,7 @@ def get_data(card, save_images=False, save_dir=None): # get a string of all emojis contained in the tweet try: - emoji_tags = card.find_elements_by_xpath('.//img[contains(@src, "emoji")]') + emoji_tags = card.find_elements_by('xpath', './/img[contains(@src, "emoji")]') except: return emoji_list = [] @@ -107,7 +107,7 @@ def get_data(card, save_images=False, save_dir=None): # tweet url try: - element = card.find_element_by_xpath('.//a[contains(@href, "/status/")]') + element = card.find_element_by('xpath', './/a[contains(@href, "/status/")]') tweet_url = element.get_attribute('href') except: return @@ -235,7 +235,7 @@ def log_in(driver, env, timeout=20, wait=4): sleep(random.uniform(wait, wait + 1)) # enter email - email_el = driver.find_element_by_xpath(email_xpath) + email_el = driver.find_element_by('xpath', email_xpath) sleep(random.uniform(wait, wait + 1)) email_el.send_keys(email) sleep(random.uniform(wait, wait + 1)) @@ -243,14 +243,14 @@ def log_in(driver, env, timeout=20, wait=4): sleep(random.uniform(wait, wait + 1)) # in case twitter spotted unusual login activity : enter your username if check_exists_by_xpath(username_xpath, driver): - username_el = driver.find_element_by_xpath(username_xpath) + username_el = driver.find_element_by('xpath', username_xpath) sleep(random.uniform(wait, wait + 1)) username_el.send_keys(username) sleep(random.uniform(wait, wait + 1)) username_el.send_keys(Keys.RETURN) sleep(random.uniform(wait, wait + 1)) # enter password - password_el = driver.find_element_by_xpath(password_xpath) + password_el = driver.find_element_by('xpath', password_xpath) password_el.send_keys(password) sleep(random.uniform(wait, wait + 1)) password_el.send_keys(Keys.RETURN) @@ -270,7 +270,7 @@ def keep_scroling(driver, data, writer, tweet_ids, scrolling, tweet_parsed, limi while scrolling and tweet_parsed < limit: sleep(random.uniform(0.5, 1.5)) # get the card of tweets - page_cards = driver.find_elements_by_xpath('//article[@data-testid="tweet"]') # changed div by article + page_cards = driver.find_elements_by('xpath', '//article[@data-testid="tweet"]') # changed div by article for card in page_cards: tweet = get_data(card, save_images, save_images_dir) if tweet: @@ -349,12 +349,12 @@ def get_users_follow(users, headless, env, follow=None, verbose=1, wait=2, limit while scrolling and not is_limit: # get the card of following or followers # this is the primaryColumn attribute that contains both followings and followers - primaryColumn = driver.find_element_by_xpath('//div[contains(@data-testid,"primaryColumn")]') + primaryColumn = driver.find_element_by('xpath', '//div[contains(@data-testid,"primaryColumn")]') # extract only the Usercell - page_cards = primaryColumn.find_elements_by_xpath('//div[contains(@data-testid,"UserCell")]') + page_cards = primaryColumn.find_elements_by('xpath', '//div[contains(@data-testid,"UserCell")]') for card in page_cards: # get the following or followers element - element = card.find_element_by_xpath('.//div[1]/div[1]/div[1]//a[1]') + element = card.find_element_by('xpath', './/div[1]/div[1]/div[1]//a[1]') follow_elem = element.get_attribute('href') # append to the list follow_id = str(follow_elem) @@ -402,7 +402,7 @@ def check_exists_by_link_text(text, driver): def check_exists_by_xpath(xpath, driver): timeout = 3 try: - driver.find_element_by_xpath(xpath) + driver.find_element_by('xpath', xpath) except NoSuchElementException: return False return True