Configuration#

This section auto-documents the repository application configuration.

pydantic settings app.core.config.Settings[source]#

Settings model for API.

Config:
  • extra: str = ignore

  • env_file: str = .env

  • env_file_encoding: str = utf-8

Fields:
field allowed_import_domains: list[str] = ['blob.core.windows.net'][source]#

Allowed domain suffixes for import storage URLs. URLs whose hostname does not match are rejected with 403. Empty list disables the check.

field app_name: str [Required][source]#
field auth_provider: Literal['azure', 'keycloak'] = 'azure'[source]#

The authentication provider to use: ‘azure’ or ‘keycloak’.

field azure_application_id: str [Required][source]#
field azure_blob_config: AzureBlobConfig | None = None[source]#
field azure_login_url: str = 'https://login.microsoftonline.com'[source]#
field bypass_auth: bool | None = None[source]#

Override auth bypass behavior. When None (default), auth is bypassed only when running locally (ENV=local/test). Set to False to enforce auth even locally.

field cli_client_id: str | None = None[source]#
field cors_allow_origins: list[str] [Optional][source]#

List of allowed origins for CORS.

field db_config: DatabaseConfig [Required][source]#
field dedup_scoring: DedupCandidateScoringConfig = DedupCandidateScoringConfig(max_author_clauses=25, min_author_token_length=2)[source]#
field default_download_file_chunk_size: Literal[1] = 1[source]#

Number of records to process in a single file chunk when downloading.Not configurable or used, just representing that we stream line-by-line at this point.

field default_es_indexing_chunk_size: int = 1000[source]#

Number of records to process in a single chunk when indexing to Elasticsearch.

field default_es_percolation_chunk_size: int = 1000[source]#

Number of records to process in a single chunk when percolating to Elasticsearch.

field default_pending_enhancement_lease_duration: timedelta = datetime.timedelta(seconds=600)[source]#

The default duration to lease pending enhancements for, provided in ISO 8601 duration format eg ‘PT10M’.

field default_upload_file_chunk_size: int = 1[source]#

Number of records to process in a single file chunk when uploading.

field env: Environment = Environment.PRODUCTION[source]#

The environment the app is running in.

field es_config: ESConfig [Required][source]#
field es_indexing_chunk_size_override: dict[ESIndexingOperation, int] [Optional][source]#

Override the default Elasticsearch indexing chunk size.

field es_percolation_chunk_size_override: dict[ESPercolationOperation, int] [Optional][source]#

Override the default Elasticsearch percolation chunk size.

field es_reference_repair_chunk_size: int = 1000[source]#

Number of reference records to process in a single distributed task when repairing or rebuilding the reference index in Elasticsearch. Be wary that if increased too far, then the repair_reference_index_for_chunk task will require long_running=True and subsequent lock management.

field feature_flags: FeatureFlags = FeatureFlags(enable_percolation=True)[source]#
field import_reference_retry_count: int = 3[source]#

Number of times to retry importing a reference before marking it as failed. We only retry on errors we are confident can be resolved - eg network issues or inconsistent database state being loaded in parallel.

field keycloak_client_id: str | None = None[source]#

The Keycloak client ID for token validation.

field keycloak_issuer_url: str | None = None[source]#

The issuer URL for token validation. Defaults to keycloak_url if not set. Useful when the token issuer differs from the internal Keycloak URL (e.g., in Docker where tokens are issued with localhost but JWKS is fetched via internal network).

field keycloak_realm: str = 'destiny'[source]#

The Keycloak realm name.

field keycloak_url: str | None = None[source]#

The base URL of the Keycloak server (used for JWKS fetching).

field log_level: LogLevel = LogLevel.INFO[source]#

The log level for the application. This applies to both opentelemetry and standard logging.

field max_lookup_reference_query_length: int = 100[source]#

Maximum number of identifiers to allow in a single reference lookup query.

field max_pending_enhancements_batch_size: int = 10000[source]#

Maximum number of pending enhancements to return in a single batch.

field message_broker_namespace: str | None = None[source]#
field message_broker_queue_name: str = 'taskiq'[source]#
field message_broker_url: str | None = None[source]#
field message_lock_renewal_duration: int = 10800[source]#

Duration in seconds to keep renewing message locks. Should be longer than expected processing time.

field minio_config: MinioConfig | None = None[source]#
field otel_config: OTelConfig | None = None[source]#
field otel_enabled: bool = False[source]#
field presigned_url_expiry_seconds: int = 3600[source]#

The number of seconds a signed URL is valid for.

field project_root: Path = PosixPath('/home/runner/work/destiny-repository/destiny-repository')[source]#
field tests_use_rabbitmq: bool = False[source]#

Whether to use RabbitMQ for tests. Only used in test environment. If false, uses in-memory broker.

field toml: TOML = TOML(toml_path=PosixPath('/home/runner/work/destiny-repository/destiny-repository'))[source]#
field trusted_unique_identifier_types: set[ExternalIdentifierType] [Optional][source]#

Set of external identifier types that are considered trusted unique identifiers for references. These are used to shortcut deduplication. If empty, shortcutting is essentially feature-flagged off.

field upload_file_chunk_size_override: dict[UploadFile, int] [Optional][source]#

Override the default upload file chunk size.

property default_blob_container: str[source]#

Return the default blob container.

property default_blob_location: str[source]#

Return the default blob location.

property running_locally: bool[source]#

Return True if the app is running locally.

property should_bypass_auth: bool[source]#

Return True if auth should be bypassed.

Auth can only be bypassed when running locally (ENV=local/test).

property trace_repr: str[source]#

Get a string representation of the config for tracing.