Skip to content

Commit

Permalink
fixed bad encoding in international languages
Browse files Browse the repository at this point in the history
  • Loading branch information
Allexio committed Apr 2, 2021
1 parent a36590f commit ca04a0c
Show file tree
Hide file tree
Showing 5 changed files with 11 additions and 5 deletions.
5 changes: 4 additions & 1 deletion src/parser_interests.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@ def parse_interests(user_data: dict) -> dict:
user_interests = utils.json_file_converter(interests_path)["topics"]
interest_categories = utils.json_file_converter(interest_categories_path)

for interest in user_interests:
interest = interest.encode('latin_1').decode('utf8')

interest_category_count = {}
interest_category_total = {}

Expand Down Expand Up @@ -58,6 +61,6 @@ def html_interest_list_builder(user_interests: list, interest_categories: dict)
corresponding_category = category
elif interest in politicians:
corresponding_category = "Political Orientation"
html_interests += start_html + interest + mid_html + corresponding_category + end_html
html_interests += start_html + interest.encode('latin_1').decode('utf8') + mid_html + corresponding_category + end_html

return html_interests
3 changes: 2 additions & 1 deletion src/parser_off_facebook_tracking.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ def parse_off_facebook_activities(user_data: dict) -> dict:
off_facebook_activities_path = getcwd() + "/temp/ads_and_businesses/your_off-facebook_activity.json"
off_facebook_activity_list = utils.json_file_converter(off_facebook_activities_path)["off_facebook_activity"]


number_of_websites_tracking_the_user = 0
number_of_applications_tracking_the_user = 0

Expand All @@ -16,7 +17,7 @@ def parse_off_facebook_activities(user_data: dict) -> dict:
number_of_websites_tracking_the_user += 1
else:
number_of_applications_tracking_the_user += 1
events_per_tracker[tracking_entity["name"]] = len(tracking_entity["events"])
events_per_tracker[tracking_entity["name"].encode('latin_1').decode('utf8')] = len(tracking_entity["events"])

# sort dict of events per tracker by descending value order
events_per_tracker = {k: v for k, v in sorted(events_per_tracker.items(), reverse=True, key=lambda item: item[1])}
Expand Down
3 changes: 3 additions & 0 deletions src/parser_peer_group.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ def parse_peer_group(user_data: dict) -> dict:
return user_data
peer_group = peer_group_info["friend_peer_group"]


peer_group = peer_group.encode('latin_1').decode('utf8')

user_data["peer_group"] = peer_group

return user_data
2 changes: 1 addition & 1 deletion src/parser_profile_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ def parse_user_info(user_data: dict) -> dict:
user_name = profile_data["name"]["full_name"]
join_year = utils.epoch_to_year(profile_data["registration_timestamp"])
if "relationship" in profile_data:
relationship_status = profile_data["relationship"]["status"]
relationship_status = profile_data["relationship"]["status"].encode('latin_1').decode('utf8')
if "partner" in profile_data["relationship"]:
relationship_status += " with " + profile_data["relationship"]["partner"]
relationship_timestamp = utils.epoch_to_year(profile_data["relationship"]["timestamp"])
Expand Down
3 changes: 1 addition & 2 deletions src/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

def json_file_converter(file_path: str) -> dict:
""" Opens a json file and returns a corresponding python list or dictionary object """
with open(file_path, "r", encoding="utf8") as json_file:
with open(file_path, "r", encoding="latin1") as json_file:
json_str = json_file.read()
python_str = json_str.replace("true", "True").replace("false", "False").replace("null", "None")
python_dict = literal_eval(python_str)
Expand All @@ -14,7 +14,6 @@ def epoch_to_year(posix_time: int) -> str:
# just in case number provided is a string
posix_time = int(posix_time)
year = datetime.utcfromtimestamp(posix_time).strftime('%Y')
month = datetime.utcfromtimestamp(posix_time).strftime('%m')
return year

def epoch_to_year_and_month(posix_time: int) -> str:
Expand Down

0 comments on commit ca04a0c

Please sign in to comment.