Schema#
lit.sdk.data.schema
#
This module provides classes and functions for managing schemas.
Schema
#
Represents the configuration for a model and contains information required for compiling the model.
name = name
instance-attribute
#
The name of the schema.
path = Path('/data') / self.team / 'schemas' / f'{self.name}.json'
instance-attribute
#
The path to the JSON representation of the canvas on disk.
team = team
instance-attribute
#
The name of the team.
__init__(team, name)
#
Initializes a new instance of Schema.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
team
|
str
|
The team which owns the Schema. |
required |
name
|
str
|
The name of the Schema. |
required |
build()
#
Starts a build process in a new screen session.
create(team, name, *, dataset=None, features=None, from_schema=None, paths=None, resolution=None, num_chunks=None, hours_filter=None, split=None, split_date=None, split_percent=None, precache_count=None, seed=None, tags=None, project=None)
classmethod
#
Create a new schema with the given parameters.
This provides a programmatic way to create schemas, either from scratch or by cloning an existing schema and overriding specific parameters.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
team
|
str
|
The team which will own the schema. |
required |
name
|
str
|
The name for the new schema. |
required |
dataset
|
Optional[str]
|
Dataset name from raw/ folder (e.g., 'aapl'). Required unless using from_schema. |
None
|
features
|
Optional[list[str]]
|
List of feature names from features/_index.json. Required unless using from_schema. |
None
|
from_schema
|
Optional[str]
|
Clone from an existing schema. Only override the params you specify. |
None
|
paths
|
Optional[list[str]]
|
Explicit list of data file paths. If omitted, auto-discovers from dataset folder. |
None
|
resolution
|
Optional[int]
|
Data resolution in minutes (default: 1). |
None
|
num_chunks
|
Optional[int]
|
Number of chunks for parallel processing (default: 80). Fewer chunks = larger chunks = better parallelism for fast operations. |
None
|
hours_filter
|
Optional[str]
|
Filter by trading hours: 'all', 'market', or 'extended' (default: 'all'). |
None
|
split
|
Optional[str]
|
Split method: 'date' or 'percent' (default: 'date'). |
None
|
split_date
|
Optional[str]
|
Split date for train/test in ISO format (e.g., '2018-01-01T06:00:00.000Z'). |
None
|
split_percent
|
Optional[int]
|
Percentage of data for test set (default: 20). |
None
|
precache_count
|
Optional[int]
|
Number of pre-shuffled dataset variants to create (default: 1). |
None
|
seed
|
Optional[int]
|
Random seed for shuffle reproducibility (default: 0). |
None
|
tags
|
Optional[list[str]]
|
List of tags for organization. |
None
|
project
|
Optional[str]
|
Project name for grouping related schemas. |
None
|
Returns:
| Name | Type | Description |
|---|---|---|
Schema |
Schema
|
The newly created schema object. |
Raises:
| Type | Description |
|---|---|
LitDataError
|
If schema creation fails. |
Examples:
>>> # Clone and modify an existing schema
>>> schema = Schema.create(
... team='contoso',
... name='schema_sanity_test',
... from_schema='schema_hourly_volatility',
... features=['100_1_minute_bars', '100_1_minute_atr', 'new_label', 'per_minute'],
... tags=['sanity-test', 'volatility']
... )
>>> # Create from scratch
>>> schema = Schema.create(
... team='contoso',
... name='my_new_schema',
... dataset='aapl',
... features=['100_1_minute_bars', '100_1_minute_atr', 'atr_up_next_hour', 'per_minute'],
... split_date='2020-01-01T00:00:00.000Z',
... num_chunks=40,
... tags=['experiment', 'v1'],
... project='volatility'
... )
remove()
#
Permanently removes a schema.