forked from magda-io/magda-config
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathconfig.yaml
565 lines (468 loc) · 22.2 KB
/
config.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
## Global settings affect all services
global:
## Whether to used the combined db - if this is true all the databases used
## by magda will be consolidated into a single pod. If it's false, they'll
## all run in separate pods unless some managed postgres solution (e.g.
## useCloudSql) is specified elsewhere.
## Running in separate pods scales better, but requires more resources
## Will be overwritten to `true` if deployed through `terraform`
# useCombinedDb: true
## Whether to use the Google Cloud SQL managed postgres solution. Only set
## this to true if you already have cloud sql set up and be sure to look at
## settings for the cloud-sql-proxy service below.
## Will be overwritten to `false` if deployed through `terraform`
# useCloudSql: false
## The external URL of the installation, e.g. "https://search.data.gov.au".
## This is used in a number of places, including linking back to the site
## from emails, creating a absolute sitemap URL for robots.txt, etc.
externalUrl: https://example.com
## How to handle rolling updates during a deployment. If set to 0, kubernetes
## will happily take down pods while it replaces them (useful for dev setups).
## If set to 1, kubernetes will make sure one pod is available to take requests
## during upgrades. 0 by default.
# rollingUpdate:
# maxUnavailable: 0
## Setting section for openfass
# openfaas:
# # turn off auth over openfass gateway for ease of debugging
# allowAdminOnly: false
image:
## What tag of the magda images to get. Look at
## for versions. https://github.com/magda-io/magda/releases/latest.
## By default this is the same version as the helm chart.
# tag:
## The docker repository to get the images from - defaults to the official
## data61 docker hub repo
# repository: data61
## The imagePullPolicy to use for images - generally unless you're actively
## trying to track development this should be "IfNotPresent", otherwise if
## you are it can be "Always". Defaults to "IfNotPresent"
# pullPolicy: IfNotPresent
## Kubernetes secret to use if you're fetching images from a private repository
# imagePullSecret: ""
## Specify the shared common settings for all connectors
# connectors:
## Whether to run an initial job to crawl all these on install
## includeInitialJobs: false
## Whether to run recurring jobs on the schedule.
## includeCronJobs: true
# Set default image setting for all connectors.
# You may explicitly set all connectors to use images from docker hub while load magda-core images from a private docker registry
# image:
# repository: docker.io/data61
# tag: 0.0.57-0
# pullPolicy: IfNotPresent
# imagePullSecret: false
## Specify the shared common settings for all minions
# minions:
# Set default image setting for all minions.
# You may explicitly set all minions to use images from docker hub while load magda-core images from a private docker registry
# image:
# repository: docker.io/data61
# tag: 0.0.57-0
# pullPolicy: IfNotPresent
# imagePullSecret: false
## Whether to expose the node ports of the pods in the cluster - useful for
## development, because it allows you to connect directly to pods without using
## kubectl proxy
# exposeNodePorts: false
## Whether to enable kubernetes liveness probes - these continually hit pods in
## order to see whether they're still alive and restart them upon too many failures.
## This is essential for prod, but tends to create a lot of overhead in development
# enableLivenessProbes: false
## The admin to use as the root admin user in the database. Defaults to the very first
## uuidv4
# defaultAdminUserId: "00000000-0000-4000-8000-000000000000"
## Default log level to use across the project. Currently this only affects scala-based
## services
# logLevel: INFO
## Whether to disable database authentication - useful in development, a very very bad
## idea in production
# noDbAuth: false
## Whether to give the pods priority classes - these ensure that essential pods like
## ElasticSearch, Databases and the Gateway are schedules ahead of less essential pods
## like minions, but tend to cause problems if you're running Magda over multiple
## namespaces. Make sure that the priorities tag is true if you turn this on.
# enablePriorityClass: true
## Whether to enable multi-tenancy (pre-alpha)
# enableMultiTenants: false
## Tags allow for services to be turned on and off on the fly... e.g. if you don't need
## to send emails, turn the correspondence-api off
tags:
## "all" is a shortcut which turns on all services except minions.
all: false
## General services. Note that if you are using combined-db, you need to set both
## combined-db and the databases that you are using (e.g. authorization-db) to true.
## You can find explanations of what each of these are below
admin-api: true
apidocs-server: true
authorization-api: true
authorization-db: true
## Will be overwritten to `false` if deployed through `terraform`
# cloud-sql-proxy: true
combined-db: true
content-db: true
content-api: true
correspondence-api: false # Disabled because it needs an SMTP server to run
elasticsearch: true
gateway: true
indexer: true
preview-map: true
registry-api: true
registry-db: true
search-api: true
session-db: true
web-server: true
## Whether to use an ingress, which is necessary for HTTPS and using the Google CDN with
## Google Kubernetes Engine. By default this is off, and access is provided to the cluster
## through a Kubernetes LoadBalancer service
## Will be overwritten to `false` if deployed through `terraform`
# ingress: false
## Priority classes - see explanation under "enablePriorityClasses" above
priorities: true
## Minions, which listen to changes in the registry and add extra metadata
minion-broken-link: true
minion-linked-data-rating: true
minion-visualization: true
minion-format: true
## Tenant api / db (pre-alpha)
tenant-api: false
tenant-db: false
## Open Policy Agent is used to determine what's authorised.
opa: true
## storage-api is used to store dataset data files
storage-api: true
## openfaas is used to manage & run serverless workload
openfaas: true
## These are the external jobs that pull data in from other data portals. By default, all connectors are turned on
## Here we turn off all connectors for the initial deployment
connectors: false
## You may want to turn DGA connector on after the initial deployment to generate some data
## connector-dga: true
## This is the config for the DGA connector. Only take effect if the connector is turned on with the `connector-dga` tag above
## More config examples can be found from [here](https://github.com/magda-io/magda/blob/master/deploy/helm/magda/values.yaml#L26)
connector-dga:
## Whether to run an initial job to crawl all these on install
## includeInitialJobs: false
## Whether to run recurring jobs on the schedule specified per connector.
# includeCronJobs: true
## They can also have their resource requirements customised individually as per
## https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/ like so:
# resources:
# requests:
# cpu: 0m
# memory: 0Mi
# limits:
# cpu: 0m
# memory: 0Mi
## You can also set image details (except name) for all connectors at this level - this will override
## what's in .global but be overridden by what's in the individual connector config.
# image:
# tag: vx.x.x
# pullPolicy: IfNotPresent
# imagePullSecret: ""
config:
## Unique id to identify this connector and records that are harvested from it
id: dga
## Friendly readable name
name: "data.gov.au"
## The base URL of the place to source data from
sourceUrl: "https://data.gov.au/"
## When crawling through from beginning to end, how big should the individual requests be in records?
pageSize: 1000
## Crontab schedule for how often this should happen.
schedule: "0 * * * *"
## CKAN-specific config: what harvest sources to ignore. * will ignore everything that's been harvested
## from another portal
ignoreHarvestSources: ["*"]
## CKAN-only: Only get datasets that match this organisation name - you can only have one due to limitations
## in the CKAN API, if you need multiple organisations make multiple connectors.
#allowedOrganisationName: "doee"
## Extra data that will be added to the 'source' aspect
# extras:
# a: 122
# b:
# - 1
## Extra data that will be merged into the aspects that are recieved from the connector
# presetRecordAspects:
# - id: "dcat-dataset-strings"
# recordType: "dataset"
# data:
# creation:
# isOpenData: true
# All core components are configurable under magda-core instead
# connectors & minions are not core components anymore
magda-core:
## Settings for the ingress. You need one of these if you want to do SSL or use the Google CDN on GKE
## Otherwise using the built-in Kubernetes LoadBalancer works ok.
# ingress:
## The hostname of the site
# hostname: example.com
## The name of the external ip to use - used when using the GCE ingress type
# ipName:
## The class of the ingress - if running on GKE, gce will automatically work, otherwise you
## can use "nginx" which requires extra setup
# ingressClass: gce
## Whether to enable TLS - requires a TLS secret to be set:
## https://kubernetes.io/docs/tasks/tls/managing-tls-in-a-cluster/
# enableTls: false
## Name of the tls secret, defaults to "magda-cert-tls"
# tlsSecretName: "magda-cert-tls"
## Domains to attempt to obtain an SSL certificate for from let's encrypt, if using certmanager
## (requires extra setup)
# domains:
# - example.com
## Below are individual settings for services. All services can be customised as follows:
# servicename:
## The number of replicas to run
# replicas: 1
## The service's image as per the global options under "global.image" like so:
# image:
# tag: vx.x.x
# pullPolicy: IfNotPresent
# imagePullSecret: ""
## They can also have their resource requirements customised individually as per
## https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/ like so:
# resources:
# requests:
# cpu: 0m
# memory: 0Mi
# limits:
# cpu: 0m
# memory: 0Mi
## Furthermore, all the stateful sets (elasticsearch.data and anything ending in -db) can
## have the volume that they claim customised like so:
# databasename-db:
# data:
## The amount of storage to allocate - be generous because it's often not easy to resize
# storage: 200Gi
## The storage class - needs to correspond to a storage class configured in kubernetes
# storageClass: "ssd"
## Database pods (anything ending in -db) can also be configured with streaming backups via
## WAL-E (https://github.com/wal-e/wal-e)
# databasename-db:
# waleBackup:
## The method to use for backup - either set to WAL (write-ahead-log) or leave blank
## for no backups
# method: WAL
## The time (in the server's timezone) to do a full backup of the system
# executionTime: 03:00
## Whether to only read backups but not write them
# readOnly: false
## The WAL-E recovery mode to use - generally leave this out, or set to "immediate" for
## the quickest possible recovery at the expense of some data loss
# recoveryMode:
## Settings for pushing backups to Google Storage
## The prefix of the place to push the backups
# gsPrefix: "gs://magda-postgres-backups-asia/dev"
## Credentials for authenticating
# googleApplicationCreds:
## The secret to look for the credentials json file in
# secretName: storage-account-credentials
## The filename of the credentials json mounted in the secret
# fileName: TerriaJS-5e042b649f8a.json
## Settings for the apidocs server, which serves the documentation for the api from
## /api/v0/apidocs/index.html
# apidoc-server:
## Settings for the authorization-api, which determines what each user's permissions are:
# authorization-api:
## Settings for the authorization-db, which serves the authorization-api
# authorization-db:
## A pod for proxying to Google Cloud SQL, the GCE managed database service.
# cloud-sql-proxy:
## The cloud sql instance to connect to
# instanceConnectionName: project:region:instanceid
## A pod that combines all the databases if global.useCombinedDb is set to true
# combined-db:
## A pod that controls the content in magda's UI
# content-api:
## Override the SCSS variables used by Magda's SCSS files. Look at the "_variables.scss"
## file in the magda code for details. Most customisation can be performed by setting
## magda-color-primary and magda-color-secondary
# scssVars: '{"magda-color-primary": "#272727", "magda-color-secondary": "#726a5c"}'
## The database behind content-api
# content-db:
## A service that renders emails and forwards them to an SMTP server
correspondence-api:
## The default recipient to email when there's no other choice - e.g. when asking a
## question about a dataset for which there's no organisational email
defaultRecipient: "[email protected]"
## The hostname of the SMTP server to use - the password should be in a secret
smtpHostname: "example.com"
## The SMTP port to use - beware, default SMTP ports are blocked on most cloud
## providers
smtpPort: 80
## The elasticsearch setup - this can consist of up to three different kinds of pods -
## data nodes, master nodes and client nodes.
# elasticsearch:
## Whether to use client and master nodes, or just use data nodes.
# production: false
## All three kinds of nodes can be customised with these settings like so:
# nodetype:
## The java heap size - this should be half the total memory request
# heapSize: 3000m
## Elasticsearch plugins to install
# pluginsInstall: "repository-gcs"
## Settings for the data nodes - this is a statefulset that actually holds the data,
## and is the only kind of node that's strictly necessary. You can have multiple
## replicas of these, they will discover each other and balance shards and replicas
## between them.
# data:
## Settings for the client nodes - this accepts HTTP connections from search-api and
## indexer and forwards them to the data nodes.
# client:
## Settings for the master nodes - these keep track of the data nodes as they go up
## and down and keep track of what the current state of the data should be.
# master:
## The gateway accepts incoming connections and directs them to the appropriate api pod,
## or the web server pod
gateway:
service:
## The type of kubernetes service to use for the gateway - set to LoadBalancer to
## expose an external IP, or set to "NodePort" if using an ingress
## Will be overwritten to `NodePort` if deployed through `terraform`
type: LoadBalancer
## If using a LoadBalancer, the IP to try to use. Leave unset to get an automatic one
# loadBalancerIP:
## If replacing a CKAN installation with Magda, this will redirect CKAN paths without
## direct magda equivalents (e.g. the API) to a ckan installation at this domain and path
enableCkanRedirection: false
# ckanRedirectionDomain: "example.com"
# ckanRedirectionPath: "/data"
## Configure the autoscaler
# autoscaler:
## whether to enable autoscaling
# enabled: false
## The minimum number of replicas to have
# minReplicas: 2
## The maximum number of replicas to have
# maxReplicas: 4
## Client ids to use for OAuth authentication
auth:
## Client id for facebook
# facebookClientId: ""
## Client id for google
# googleClientId: ""
## If you want to use ckan as a way to login, put the base URL here
ckanAuthenticationUrl: https://data.gov.au/data
## If you want to use the Australian Department of Industry's VANguard service, put the details here:
# vanguardWsFedIdpUrl: https://authentication.business.gov.au/fas/v2/wsfed12/authenticate
# vanguardWsFedRealm: your-realm-here
## Configures the Content Security Policy that will be set on all responses -
## the contents will be converted to JSON and passed into https://github.com/helmetjs/csp
csp:
directives:
scriptSrc:
- "''self''"
- "''unsafe-inline''"
- browser-update.org
objectSrc:
- "''none''"
## Configures helmet.js - config will be converted to JSON and passed to
## https://github.com/helmetjs/helmet
# helmet:
## Configures cross-origin resource sharing - will be converted to JSON and passed
## to https://www.npmjs.com/package/cors#configuring-cors
# cors:
## Enables authorization, include a list of authorization options at /auth/providers
## and a simple form at /auth that allows you to test login and logout functionality
## without needing a web pod
enableAuthEndpoint: true
## Redirect HTTP urls to HTTPs urls
enableHttpsRedirection: false
## Turn on HTTP basic authentication - this uses secrets created by the create-secret script
enableWebAccessControl: false
## Configures the service that puts datasets and organisations into elasticsearch
# indexer:
# elasticsearch:
## How many shards and replicas to use when creating a new index. Generally, more
## shards mean that the index will be split among more data nodes, and more replicas
## allows elasticsearch to copy those shards among different nodes.
## In general you want a number of shards equal to the lowest number of data nodes
## possible, and a number of replicas equal to the highest number.
# shards: 1
# replicas: 0
## Configures the TerriaJS server responsible for serving preview maps.
# preview-map:
# image:
# repository: docker.io/data61
# tag: 0.0.57-0
# pullPolicy: IfNotPresent
# imagePullSecret: false
## Configures the registry, which holds information about datasets, distributions,
## organisations etc and distributes it.
# registry-api:
## Log level to use
# logLevel: INFO
## Stops checking for authorization on methods that change the state of the registry.
## Useful when running locally - DO NOT TURN ON IN PRODUCTION
# skipAuthorization: false
## Configures the liveness probe that checks if the registry is still alive. This
## is configurable seperately because currently the registry api has a tendency to
## slow down over time, and needs to be restarted when this occurs.
# livenessProbe:
# timeoutSeconds: 10
## The registry api is able to be split into two kinds of deployments: full, which does everything
## but can only ever run one pod at a time, and read-only, which can only do read-only requests but
## is stateless and can be horizontally scaled
# deployments:
# full:
## Resources for the full deployment, as per other resource declarations
# resources:
# readOnly:
## Whether to enable the read-only deployment - if not all traffic will go to the full deployment,
## otherwise traffic through /api/v0/registry will come here and only traffic to /api/v0/registry-auth
## will go to the full deployment
# enable: true
## Number of replicas
# replicas: 2
## Resources for the full deployment, as per other resource declarations
# resources:
## Configures the registry database, if a separate one is being used.
# registry-db:
## Configures the API that connects to ElasticSearch to perform searches.
# search-api:
## Configures the database used to store session information by passport js, if
## one is being used.
# session-db:
## The server used for serving the web application to users.
web-server:
## Autoscaler settings - the min and max number of pods to use
# autoscaler:
# minReplicas: 2
# maxReplicas: 4
## Whether to disable the ability to log in in the UI - set this to true if
## you don't want to use any account features.
# disableAuthenticationFeatures: true
## If you're replacing an existing system, you can set its url here to display a
## banner allowing people to go back if they don't like the new system.
# fallbackUrl: "https://data.gov.au"
# Google Analytics Ids to collect analytics data from the frontend
# gapiIds:
# - "ABC123"
## The search score after which to show a "suggest a dataset" form
# datasetSearchSuggestionScoreThreshold: 65
## The id of the organization to display by default in the Add Dataset flow.
# defaultOrganizationId: "abc"
featureFlags:
cataloguing: true
previewAddDataset: true
vocabularyApiEndpoints:
- "https://vocabs.ands.org.au/repository/api/lda/abares/australian-land-use-and-management-classification/version-8/concept.json"
- "https://vocabs.ands.org.au/repository/api/lda/neii/australian-landscape-water-balance/version-1/concept.json"
- "https://vocabs.ands.org.au/repository/api/lda/ands-nc/controlled-vocabulary-for-resource-type-genres/version-1-1/concept.json"
# Settings for the admin API, which allows for changes to be made to the magda
# installation at runtime... e.g. creating new connector jobs
# admin-api:
## Settings for the broken link minion, which crawls URLs of distributions in order
## to determine whether they're still available.
# minion-broken-link:
## Settings for the format minion, which tries to determine the true format of a
## distribution based on a number of criteria.
# minion-format:
## Settings for the linked data rating minion, which determines a rating for a dataset
## based off the availability, license and format of a dataset
# minion-linked-data-rating:
## Settings for the visualization minion, which looks at tabular data in order to
## determine which columns can be charted.
# minion-visualization: