-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathMakefile
58 lines (50 loc) · 1.8 KB
/
Makefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
.PHONY: clean
# Remove all processing artifacts, build files and cache files.
clean: clean-data
rm -rf .ruff_cache/ .pytest_cache/
find . -type f -name '*.zip' -exec rm {} +
find . -type d -name '__pycache__' -exec rm -rf {} +
.PHONY: clean-data
# Remove all crawled data.
clean-data:
find . -type d -name '.data' -exec rm -rf {} +
.PHONY: ci
# Full continuous integration pipeline.
ci: lint test
.PHONY: lint
# Verify formatting for Python files.
lint:
black --diff --check src/ tests/ webcrawl/ -q
ruff check .
.PHONY: format
# Fix linting errors for all Python files.
format:
black src/ tests/ webcrawl/ -q
ruff check --fix .
.PHONY: test
# Run all project test suites.
test:
pytest tests/
.PHONY: crawl
# Run web crawlers in background with default configs. Use the following command
# to run crawlers in the virtual environment. Can be used with SSH connection to
# a remote server. To record logs to the file set filename in 'SCRAPY_LOG_TARGET'
# env variable.
crawl:
@if [ -n "$$SCRAPY_LOG_TARGET" ]; then \
LOG_TARGET=$$SCRAPY_LOG_TARGET; \
else \
LOG_TARGET="/dev/null"; \
fi; \
cd webcrawl/ && \
nohup scrapy crawl games -o "../.data/games.jl" > "$$LOG_TARGET" 2>&1 & && \
nohup scrapy crawl reviews -o "../.data/reviews.jl" > "$$LOG_TARGET" 2>&1 &
.PHONY: zip-crawler
# Compresses web crawler scripts into a '*.zip' file for execution in a virtual environment.
zip-crawler:
@which zip >/dev/null || (echo "'zip' utility not found" && exit 1)
zip -r -9 webcrawlers.zip webcrawl/ -x "steampowered/__pycache__/*" -x "steampowered/spiders/__pycache__/*"
.PHONY: requirements
# Write or update all Poetry packages into separate `requirements.txt` files for each environment.
requirements:
poetry export -f requirements.txt --only webcrawl --without-hashes --without-urls --output webcrawl/requirements.txt