Skip to content

Commit

Permalink
fix: update categories (#1049)
Browse files Browse the repository at this point in the history
Co-authored-by: Raphael Odini <[email protected]>
  • Loading branch information
raphael0202 and raphodn authored Nov 24, 2024
1 parent 478f1fd commit 5ff1c83
Show file tree
Hide file tree
Showing 152 changed files with 177 additions and 159 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
node_modules
yarn-error.log
dist
.vscode
.vscode
.venv
33 changes: 25 additions & 8 deletions data/categories/filter_categories.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
"en:pumpkins", # 9
"en:dried-mushrooms", # 7
"en:textured-vegetable-protein", # 2
"en:squash-pulp",
]

EXTRA_CHILDREN = [
Expand All @@ -46,6 +47,16 @@

EXCLUDE_LIST = ["Cooked", "Fresh", "Frozen", "Canned", "Prepacked"]

EXCLUDE_LIST_NODE_IDS = [
"en:vegetables-from-germany",
"en:vegetables-from-the-netherlands",
"en:spanish-vegetables",
"en:vegetables-from-spain",
"en:vegetables-from-italy",
"en:nut-macarons",
"en:red-kuri-pulp",
]

script_path = os.path.dirname(os.path.abspath(__file__))


Expand Down Expand Up @@ -179,7 +190,7 @@ def compare_new_categories_with_old_categories():
CATEGORIES_FULL, PARENT_CATEGORIES_ID
)
print(
"Filter with the following parent categories:",
"Filter on the following parent categories:",
[node.id for node in PARENT_CATEGORIES],
)

Expand All @@ -193,31 +204,37 @@ def compare_new_categories_with_old_categories():
categories_filtered.extend(
get_taxonomy_node_list_by_id_list(CATEGORIES_FULL, EXTRA_CHILDREN)
)
# Step 2c: keep only nodes starting with "en:"
# Step 2c: exlude
# - keep only nodes starting with "en:"
# - remove nodes in EXCLUDE_LIST_NODE_IDS
# - remove nodes containing some strings in EXCLUDE_LIST
categories_filtered = [
node for node in categories_filtered if node.id.startswith("en:")
node
for node in categories_filtered
if node.id.startswith("en:")
]
# Step 2d: remove nodes containg some strings
print("Additional filtering on:", EXCLUDE_LIST)
categories_filtered = [node for node in categories_filtered if node.id not in EXCLUDE_LIST_NODE_IDS]
categories_filtered = filter_node_list_by_exclude_string_list(
categories_filtered, EXCLUDE_LIST
)
print("Finished filtering:", len(categories_filtered))

# Deduplicate
# Step 3: deduplicate
categories_filtered_deduped = []
seen = set()
for category in categories_filtered:
if category.id not in seen:
categories_filtered_deduped.append(category)
seen.add(category.id)
print("Finished deduplicating")

# Step 3: transform to dict list
# Step 4: transform to dict list
categories_filtered_to_dict_list = [
{"id": node.id, **node.to_dict()} for node in categories_filtered_deduped
]
print("Categories remaining:", len(categories_filtered_to_dict_list))

# Step 4: write to files (1 per language)
# Step 5: write to files (1 per language)
write_categories_to_files(categories_filtered_to_dict_list, delete_parents=True)
print("Wrote to language files")

Expand Down
2 changes: 1 addition & 1 deletion src/data/categories/aa.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/ach.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/af.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/ak.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/am.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/ar.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/as.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/ast.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/az.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/be.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/ber.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/bg.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/bm.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/bn.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/bo.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/br.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/bs.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/ca.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/ce.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/chr.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/co.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/crs.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/cs.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/cv.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/cy.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/da.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/de.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/el.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/en.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/en_AU.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/en_GB.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/eo.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/es.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/et.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/eu.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/fa.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/fi.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/fil.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/fo.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/fr.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/ga.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/gd.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/gl.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/gu.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/ha.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/he.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/hi.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/hr.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/ht.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/hu.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/hy.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/id.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/ii.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/is.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/it.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/iu.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/ja.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/jv.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/ka.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/kab.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/kk.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/km.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/kmr_TR.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/kn.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/ko.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/kw.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/ky.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/la.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/lb.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/lo.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/lt.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/lv.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/mg.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/mi.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/ml.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/mn.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/mr.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/ms.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/mt.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/my.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/nb.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/ne.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/nl_BE.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/nl_NL.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/nn.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/no.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/nr.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/oc.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/or.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/pa.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/pl.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/pt_BR.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/pt_PT.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/qu.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/rm.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/ro.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/ru.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/sa.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/sat.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/sc.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/sco.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/sd.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/sg.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/si.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/sk.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/sl.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/sma.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/sn.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/so.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/son.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/sq.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/sr.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/sr_CS.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/sr_RS.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/ss.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/st.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/sv.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/sw.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/ta.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/te.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/tg.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/th.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/ti.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/tl.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/tn.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/tr.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/ts.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/tt.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/tw.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/ty.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/tzl.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/ug.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/uk.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/ur.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/uz.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/val.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/ve.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/vec.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/vi.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/vls.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/wa.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/wo.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/xh.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/yi.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/yo.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/zea.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/zh_CN.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/zh_HK.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/zh_TW.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/categories/zu.json

Large diffs are not rendered by default.

0 comments on commit 5ff1c83

Please sign in to comment.