Module materialize.data_ingest.data_type

Expand source code Browse git
# Copyright Materialize, Inc. and contributors. All rights reserved.
#
# Use of this software is governed by the Business Source License
# included in the LICENSE file at the root of this repository.
#
# As of the Change Date specified in that file, in accordance with
# the Business Source License, use of this software will be governed
# by the Apache License, Version 2.0.

import json
import random
import string
import uuid
from enum import Enum
from typing import Any

from pg8000.native import literal

from materialize.util import all_subclasses


class RecordSize(Enum):
    TINY = 1
    SMALL = 2
    MEDIUM = 3
    LARGE = 4


class Backend(Enum):
    AVRO = 1
    JSON = 2
    POSTGRES = 3


class DataType:
    """As supported by Avro: https://avro.apache.org/docs/1.11.1/specification/_print/"""

    @staticmethod
    def random_value(
        rng: random.Random,
        record_size: RecordSize = RecordSize.LARGE,
        in_query: bool = False,
    ) -> Any:
        """Generate a random value, should be possible for all types."""
        raise NotImplementedError

    @staticmethod
    def numeric_value(num: int, in_query: bool = False) -> Any:
        """Generate a value that corresponds to `num`, so that it will always be the same value for the same input `num`, but fits into the type. This doesn't make sense for a type like boolean."""
        raise NotImplementedError

    @staticmethod
    def name(backend: Backend = Backend.POSTGRES) -> str:
        raise NotImplementedError


class Boolean(DataType):
    @staticmethod
    def random_value(
        rng: random.Random,
        record_size: RecordSize = RecordSize.LARGE,
        in_query: bool = False,
    ) -> Any:
        return rng.choice((True, False))

    @staticmethod
    def name(backend: Backend = Backend.POSTGRES) -> str:
        return "boolean"


class SmallInt(DataType):
    @staticmethod
    def random_value(
        rng: random.Random,
        record_size: RecordSize = RecordSize.LARGE,
        in_query: bool = False,
    ) -> Any:
        if record_size == RecordSize.TINY:
            min, max = -127, 128
        elif record_size in (RecordSize.SMALL, RecordSize.MEDIUM, RecordSize.LARGE):
            min, max = -32768, 32767
        else:
            raise ValueError(f"Unexpected record size {record_size}")

        if rng.randrange(10) == 0:
            return min
        if rng.randrange(10) == 0:
            return max
        return rng.randint(min, max)

    @staticmethod
    def numeric_value(num: int, in_query: bool = False) -> Any:
        return num

    @staticmethod
    def name(backend: Backend = Backend.POSTGRES) -> str:
        if backend == Backend.AVRO:
            return "int"  # no explicit support in AVRO
        elif backend == Backend.JSON:
            return "integer"  # no explicit support in JSON
        else:
            return "smallint"


class Int(DataType):
    @staticmethod
    def random_value(
        rng: random.Random,
        record_size: RecordSize = RecordSize.LARGE,
        in_query: bool = False,
    ) -> Any:
        if record_size == RecordSize.TINY:
            min, max = -127, 128
        elif record_size == RecordSize.SMALL:
            min, max = -32768, 32767
        elif record_size in (RecordSize.MEDIUM, RecordSize.LARGE):
            min, max = -2147483648, 2147483647
        else:
            raise ValueError(f"Unexpected record size {record_size}")

        if rng.randrange(10) == 0:
            return min
        if rng.randrange(10) == 0:
            return max
        return rng.randint(min, max)

    @staticmethod
    def numeric_value(num: int, in_query: bool = False) -> Any:
        return num

    @staticmethod
    def name(backend: Backend = Backend.POSTGRES) -> str:
        if backend == Backend.JSON:
            return "integer"
        else:
            return "int"


class Long(DataType):
    @staticmethod
    def random_value(
        rng: random.Random,
        record_size: RecordSize = RecordSize.LARGE,
        in_query: bool = False,
    ) -> Any:
        if record_size == RecordSize.TINY:
            min, max = -127, 128
        elif record_size == RecordSize.SMALL:
            min, max = -32768, 32767
        elif record_size == RecordSize.MEDIUM:
            min, max = -2147483648, 2147483647
        elif record_size == RecordSize.LARGE:
            min, max = -9223372036854775808, 9223372036854775807
        else:
            raise ValueError(f"Unexpected record size {record_size}")

        if rng.randrange(10) == 0:
            return min
        if rng.randrange(10) == 0:
            return max
        return rng.randint(min, max)

    @staticmethod
    def numeric_value(num: int, in_query: bool = False) -> Any:
        return num

    @staticmethod
    def name(backend: Backend = Backend.POSTGRES) -> str:
        if backend == Backend.AVRO:
            return "long"
        elif backend == Backend.JSON:
            return "integer"
        else:
            return "bigint"


class Float(DataType):
    @staticmethod
    def random_value(
        rng: random.Random,
        record_size: RecordSize = RecordSize.LARGE,
        in_query: bool = False,
    ) -> Any:
        if rng.randrange(10) == 0:
            return 1.0
        if rng.randrange(10) == 0:
            return 0.0

        if record_size == RecordSize.TINY:
            return rng.random()
        elif record_size == RecordSize.SMALL:
            return rng.uniform(-100, 100)
        elif record_size == RecordSize.MEDIUM:
            return rng.uniform(-1_000_000, 1_000_000)
        elif record_size == RecordSize.LARGE:
            return rng.uniform(-1_000_000_000, 1_000_000_000_00)
        else:
            raise ValueError(f"Unexpected record size {record_size}")

    @staticmethod
    def numeric_value(num: int, in_query: bool = False) -> Any:
        return num

    @staticmethod
    def name(backend: Backend = Backend.POSTGRES) -> str:
        if backend == Backend.AVRO:
            return "float"
        elif backend == Backend.JSON:
            return "number"
        else:
            return "float4"


class Double(Float):
    @staticmethod
    def name(backend: Backend = Backend.POSTGRES) -> str:
        if backend == Backend.AVRO:
            return "double"
        elif backend == Backend.JSON:
            return "number"
        else:
            return "float8"


class Text(DataType):
    @staticmethod
    def random_value(
        rng: random.Random,
        record_size: RecordSize = RecordSize.LARGE,
        in_query: bool = False,
    ) -> Any:
        if rng.randrange(10) == 0:
            result = rng.choice(
                [
                    "NULL",
                    "0.0",
                    "True",
                    # "",
                    "表ポあA鷗ŒéB逍Üߪąñ丂㐀𠀀",
                    rng.randint(-100, 100),
                ]
            )
        # Fails: unterminated dollar-quoted string
        # chars = string.printable
        chars = string.ascii_letters + string.digits
        if record_size == RecordSize.TINY:
            result = rng.choice(("foo", "bar", "baz"))
        elif record_size == RecordSize.SMALL:
            result = "".join(rng.choice(chars) for _ in range(3))
        elif record_size == RecordSize.MEDIUM:
            result = "".join(rng.choice(chars) for _ in range(10))
        elif record_size == RecordSize.LARGE:
            result = "".join(rng.choice(chars) for _ in range(100))
        else:
            raise ValueError(f"Unexpected record size {record_size}")

        return literal(str(result)) if in_query else str(result)

    @staticmethod
    def numeric_value(num: int, in_query: bool = False) -> Any:
        result = f"key{num}"
        return f"'{result}'" if in_query else str(result)

    @staticmethod
    def name(backend: Backend = Backend.POSTGRES) -> str:
        if backend == Backend.POSTGRES:
            return "text"
        else:
            return "string"


class Bytea(Text):
    @staticmethod
    def name(backend: Backend = Backend.POSTGRES) -> str:
        if backend == Backend.AVRO:
            return "bytes"
        elif backend == Backend.JSON:
            return "string"
        else:
            return "bytea"


class UUID(DataType):
    @staticmethod
    def random_value(
        rng: random.Random,
        record_size: RecordSize = RecordSize.LARGE,
        in_query: bool = False,
    ) -> Any:
        result = rng.choice(
            [
                "a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11",
                "6f5eec33-a3c9-40b2-ae06-58f53aca6e7d",
                "00000000-0000-0000-0000-000000000000",
                "ffffffff-ffff-ffff-ffff-ffffffffffff",
                uuid.UUID(int=rng.getrandbits(128), version=4),
            ]
        )
        return f"'{result}'::uuid" if in_query else str(result)

    @staticmethod
    def numeric_value(num: int, in_query: bool = False) -> Any:
        result = uuid.uuid1(clock_seq=num)
        return f"'{result}'::uuid" if in_query else str(result)

    @staticmethod
    def name(backend: Backend = Backend.POSTGRES) -> str:
        return "uuid"


class Jsonb(DataType):
    @staticmethod
    def name(backend: Backend = Backend.POSTGRES) -> str:
        if backend == Backend.AVRO:
            return "record"
        elif backend == Backend.JSON:
            return "object"
        else:
            return "jsonb"

    @staticmethod
    def random_value(
        rng: random.Random,
        record_size: RecordSize = RecordSize.LARGE,
        in_query: bool = False,
    ) -> Any:
        if record_size == RecordSize.TINY:
            key_range = 1
        elif record_size == RecordSize.SMALL:
            key_range = 5
        elif record_size == RecordSize.MEDIUM:
            key_range = 10
        elif record_size == RecordSize.LARGE:
            key_range = 20
        else:
            raise ValueError(f"Unexpected record size {record_size}")
        result = {f"key{key}": str(rng.randint(-100, 100)) for key in range(key_range)}
        return f"'{json.dumps(result)}'::jsonb" if in_query else json.dumps(result)

    @staticmethod
    def numeric_value(num: int, in_query: bool = False) -> Any:
        result = {f"key{num}": str(num)}
        return f"'{json.dumps(result)}'::jsonb" if in_query else json.dumps(result)


class TextTextMap(DataType):
    @staticmethod
    def name(backend: Backend = Backend.POSTGRES) -> str:
        if backend == Backend.AVRO:
            return "record"
        elif backend == Backend.JSON:
            return "object"
        else:
            return "map[text=>text]"

    @staticmethod
    def random_value(
        rng: random.Random,
        record_size: RecordSize = RecordSize.LARGE,
        in_query: bool = False,
    ) -> Any:
        if record_size == RecordSize.TINY:
            key_range = 1
        elif record_size == RecordSize.SMALL:
            key_range = 5
        elif record_size == RecordSize.MEDIUM:
            key_range = 10
        elif record_size == RecordSize.LARGE:
            key_range = 20
        else:
            raise ValueError(f"Unexpected record size {record_size}")
        values = [
            f"{Text.numeric_value(i)} => {str(rng.randint(-100, 100))}"
            for i in range(0, key_range)
        ]
        values_str = f"{{{', '.join(values)}}}"
        return f"'{values_str}'::map[text=>text]" if in_query else values_str

    @staticmethod
    def numeric_value(num: int, in_query: bool = False) -> Any:
        values = [
            f"{Text.numeric_value(num)} => {Text.numeric_value(num)}"
            for i in range(0, num)
        ]
        values_str = f"{{{', '.join(values)}}}"
        return f"'{values_str}'::map[text=>text]" if in_query else values_str


# Sort to keep determinism for reproducible runs with specific seed
DATA_TYPES = sorted(list(all_subclasses(DataType)), key=repr)

# fastavro._schema_common.UnknownType: record
# bytea requires Python bytes type instead of str
DATA_TYPES_FOR_AVRO = sorted(
    list(set(DATA_TYPES) - {TextTextMap, Jsonb, Bytea, Boolean, UUID}), key=repr
)

# MySQL doesn't support keys of unlimited size
DATA_TYPES_FOR_KEY = sorted(list(set(DATA_TYPES_FOR_AVRO) - {Text, Bytea}), key=repr)

NUMBER_TYPES = [SmallInt, Int, Long, Float, Double]

Classes

class Backend (*args, **kwds)

Create a collection of name/value pairs.

Example enumeration:

>>> class Color(Enum):
...     RED = 1
...     BLUE = 2
...     GREEN = 3

Access them by:

  • attribute access::
>>> Color.RED
<Color.RED: 1>
  • value lookup:
>>> Color(1)
<Color.RED: 1>
  • name lookup:
>>> Color['RED']
<Color.RED: 1>

Enumerations can be iterated over, and know how many members they have:

>>> len(Color)
3
>>> list(Color)
[<Color.RED: 1>, <Color.BLUE: 2>, <Color.GREEN: 3>]

Methods can be added to enumerations, and members can have their own attributes – see the documentation for details.

Expand source code Browse git
class Backend(Enum):
    AVRO = 1
    JSON = 2
    POSTGRES = 3

Ancestors

  • enum.Enum

Class variables

var AVRO
var JSON
var POSTGRES
class Boolean
Expand source code Browse git
class Boolean(DataType):
    @staticmethod
    def random_value(
        rng: random.Random,
        record_size: RecordSize = RecordSize.LARGE,
        in_query: bool = False,
    ) -> Any:
        return rng.choice((True, False))

    @staticmethod
    def name(backend: Backend = Backend.POSTGRES) -> str:
        return "boolean"

Ancestors

Static methods

def name(backend: Backend = Backend.POSTGRES) ‑> str
Expand source code Browse git
@staticmethod
def name(backend: Backend = Backend.POSTGRES) -> str:
    return "boolean"

Inherited members

class Bytea
Expand source code Browse git
class Bytea(Text):
    @staticmethod
    def name(backend: Backend = Backend.POSTGRES) -> str:
        if backend == Backend.AVRO:
            return "bytes"
        elif backend == Backend.JSON:
            return "string"
        else:
            return "bytea"

Ancestors

Static methods

def name(backend: Backend = Backend.POSTGRES) ‑> str
Expand source code Browse git
@staticmethod
def name(backend: Backend = Backend.POSTGRES) -> str:
    if backend == Backend.AVRO:
        return "bytes"
    elif backend == Backend.JSON:
        return "string"
    else:
        return "bytea"

Inherited members

class DataType
Expand source code Browse git
class DataType:
    """As supported by Avro: https://avro.apache.org/docs/1.11.1/specification/_print/"""

    @staticmethod
    def random_value(
        rng: random.Random,
        record_size: RecordSize = RecordSize.LARGE,
        in_query: bool = False,
    ) -> Any:
        """Generate a random value, should be possible for all types."""
        raise NotImplementedError

    @staticmethod
    def numeric_value(num: int, in_query: bool = False) -> Any:
        """Generate a value that corresponds to `num`, so that it will always be the same value for the same input `num`, but fits into the type. This doesn't make sense for a type like boolean."""
        raise NotImplementedError

    @staticmethod
    def name(backend: Backend = Backend.POSTGRES) -> str:
        raise NotImplementedError

Subclasses

Static methods

def name(backend: Backend = Backend.POSTGRES) ‑> str
Expand source code Browse git
@staticmethod
def name(backend: Backend = Backend.POSTGRES) -> str:
    raise NotImplementedError
def numeric_value(num: int, in_query: bool = False) ‑> Any

Generate a value that corresponds to num, so that it will always be the same value for the same input num, but fits into the type. This doesn't make sense for a type like boolean.

Expand source code Browse git
@staticmethod
def numeric_value(num: int, in_query: bool = False) -> Any:
    """Generate a value that corresponds to `num`, so that it will always be the same value for the same input `num`, but fits into the type. This doesn't make sense for a type like boolean."""
    raise NotImplementedError
def random_value(rng: random.Random, record_size: RecordSize = RecordSize.LARGE, in_query: bool = False) ‑> Any

Generate a random value, should be possible for all types.

Expand source code Browse git
@staticmethod
def random_value(
    rng: random.Random,
    record_size: RecordSize = RecordSize.LARGE,
    in_query: bool = False,
) -> Any:
    """Generate a random value, should be possible for all types."""
    raise NotImplementedError
class Double
Expand source code Browse git
class Double(Float):
    @staticmethod
    def name(backend: Backend = Backend.POSTGRES) -> str:
        if backend == Backend.AVRO:
            return "double"
        elif backend == Backend.JSON:
            return "number"
        else:
            return "float8"

Ancestors

Static methods

def name(backend: Backend = Backend.POSTGRES) ‑> str
Expand source code Browse git
@staticmethod
def name(backend: Backend = Backend.POSTGRES) -> str:
    if backend == Backend.AVRO:
        return "double"
    elif backend == Backend.JSON:
        return "number"
    else:
        return "float8"

Inherited members

class Float
Expand source code Browse git
class Float(DataType):
    @staticmethod
    def random_value(
        rng: random.Random,
        record_size: RecordSize = RecordSize.LARGE,
        in_query: bool = False,
    ) -> Any:
        if rng.randrange(10) == 0:
            return 1.0
        if rng.randrange(10) == 0:
            return 0.0

        if record_size == RecordSize.TINY:
            return rng.random()
        elif record_size == RecordSize.SMALL:
            return rng.uniform(-100, 100)
        elif record_size == RecordSize.MEDIUM:
            return rng.uniform(-1_000_000, 1_000_000)
        elif record_size == RecordSize.LARGE:
            return rng.uniform(-1_000_000_000, 1_000_000_000_00)
        else:
            raise ValueError(f"Unexpected record size {record_size}")

    @staticmethod
    def numeric_value(num: int, in_query: bool = False) -> Any:
        return num

    @staticmethod
    def name(backend: Backend = Backend.POSTGRES) -> str:
        if backend == Backend.AVRO:
            return "float"
        elif backend == Backend.JSON:
            return "number"
        else:
            return "float4"

Ancestors

Subclasses

Static methods

def name(backend: Backend = Backend.POSTGRES) ‑> str
Expand source code Browse git
@staticmethod
def name(backend: Backend = Backend.POSTGRES) -> str:
    if backend == Backend.AVRO:
        return "float"
    elif backend == Backend.JSON:
        return "number"
    else:
        return "float4"

Inherited members

class Int
Expand source code Browse git
class Int(DataType):
    @staticmethod
    def random_value(
        rng: random.Random,
        record_size: RecordSize = RecordSize.LARGE,
        in_query: bool = False,
    ) -> Any:
        if record_size == RecordSize.TINY:
            min, max = -127, 128
        elif record_size == RecordSize.SMALL:
            min, max = -32768, 32767
        elif record_size in (RecordSize.MEDIUM, RecordSize.LARGE):
            min, max = -2147483648, 2147483647
        else:
            raise ValueError(f"Unexpected record size {record_size}")

        if rng.randrange(10) == 0:
            return min
        if rng.randrange(10) == 0:
            return max
        return rng.randint(min, max)

    @staticmethod
    def numeric_value(num: int, in_query: bool = False) -> Any:
        return num

    @staticmethod
    def name(backend: Backend = Backend.POSTGRES) -> str:
        if backend == Backend.JSON:
            return "integer"
        else:
            return "int"

Ancestors

Static methods

def name(backend: Backend = Backend.POSTGRES) ‑> str
Expand source code Browse git
@staticmethod
def name(backend: Backend = Backend.POSTGRES) -> str:
    if backend == Backend.JSON:
        return "integer"
    else:
        return "int"

Inherited members

class Jsonb
Expand source code Browse git
class Jsonb(DataType):
    @staticmethod
    def name(backend: Backend = Backend.POSTGRES) -> str:
        if backend == Backend.AVRO:
            return "record"
        elif backend == Backend.JSON:
            return "object"
        else:
            return "jsonb"

    @staticmethod
    def random_value(
        rng: random.Random,
        record_size: RecordSize = RecordSize.LARGE,
        in_query: bool = False,
    ) -> Any:
        if record_size == RecordSize.TINY:
            key_range = 1
        elif record_size == RecordSize.SMALL:
            key_range = 5
        elif record_size == RecordSize.MEDIUM:
            key_range = 10
        elif record_size == RecordSize.LARGE:
            key_range = 20
        else:
            raise ValueError(f"Unexpected record size {record_size}")
        result = {f"key{key}": str(rng.randint(-100, 100)) for key in range(key_range)}
        return f"'{json.dumps(result)}'::jsonb" if in_query else json.dumps(result)

    @staticmethod
    def numeric_value(num: int, in_query: bool = False) -> Any:
        result = {f"key{num}": str(num)}
        return f"'{json.dumps(result)}'::jsonb" if in_query else json.dumps(result)

Ancestors

Static methods

def name(backend: Backend = Backend.POSTGRES) ‑> str
Expand source code Browse git
@staticmethod
def name(backend: Backend = Backend.POSTGRES) -> str:
    if backend == Backend.AVRO:
        return "record"
    elif backend == Backend.JSON:
        return "object"
    else:
        return "jsonb"

Inherited members

class Long
Expand source code Browse git
class Long(DataType):
    @staticmethod
    def random_value(
        rng: random.Random,
        record_size: RecordSize = RecordSize.LARGE,
        in_query: bool = False,
    ) -> Any:
        if record_size == RecordSize.TINY:
            min, max = -127, 128
        elif record_size == RecordSize.SMALL:
            min, max = -32768, 32767
        elif record_size == RecordSize.MEDIUM:
            min, max = -2147483648, 2147483647
        elif record_size == RecordSize.LARGE:
            min, max = -9223372036854775808, 9223372036854775807
        else:
            raise ValueError(f"Unexpected record size {record_size}")

        if rng.randrange(10) == 0:
            return min
        if rng.randrange(10) == 0:
            return max
        return rng.randint(min, max)

    @staticmethod
    def numeric_value(num: int, in_query: bool = False) -> Any:
        return num

    @staticmethod
    def name(backend: Backend = Backend.POSTGRES) -> str:
        if backend == Backend.AVRO:
            return "long"
        elif backend == Backend.JSON:
            return "integer"
        else:
            return "bigint"

Ancestors

Static methods

def name(backend: Backend = Backend.POSTGRES) ‑> str
Expand source code Browse git
@staticmethod
def name(backend: Backend = Backend.POSTGRES) -> str:
    if backend == Backend.AVRO:
        return "long"
    elif backend == Backend.JSON:
        return "integer"
    else:
        return "bigint"

Inherited members

class RecordSize (*args, **kwds)

Create a collection of name/value pairs.

Example enumeration:

>>> class Color(Enum):
...     RED = 1
...     BLUE = 2
...     GREEN = 3

Access them by:

  • attribute access::
>>> Color.RED
<Color.RED: 1>
  • value lookup:
>>> Color(1)
<Color.RED: 1>
  • name lookup:
>>> Color['RED']
<Color.RED: 1>

Enumerations can be iterated over, and know how many members they have:

>>> len(Color)
3
>>> list(Color)
[<Color.RED: 1>, <Color.BLUE: 2>, <Color.GREEN: 3>]

Methods can be added to enumerations, and members can have their own attributes – see the documentation for details.

Expand source code Browse git
class RecordSize(Enum):
    TINY = 1
    SMALL = 2
    MEDIUM = 3
    LARGE = 4

Ancestors

  • enum.Enum

Class variables

var LARGE
var MEDIUM
var SMALL
var TINY
class SmallInt
Expand source code Browse git
class SmallInt(DataType):
    @staticmethod
    def random_value(
        rng: random.Random,
        record_size: RecordSize = RecordSize.LARGE,
        in_query: bool = False,
    ) -> Any:
        if record_size == RecordSize.TINY:
            min, max = -127, 128
        elif record_size in (RecordSize.SMALL, RecordSize.MEDIUM, RecordSize.LARGE):
            min, max = -32768, 32767
        else:
            raise ValueError(f"Unexpected record size {record_size}")

        if rng.randrange(10) == 0:
            return min
        if rng.randrange(10) == 0:
            return max
        return rng.randint(min, max)

    @staticmethod
    def numeric_value(num: int, in_query: bool = False) -> Any:
        return num

    @staticmethod
    def name(backend: Backend = Backend.POSTGRES) -> str:
        if backend == Backend.AVRO:
            return "int"  # no explicit support in AVRO
        elif backend == Backend.JSON:
            return "integer"  # no explicit support in JSON
        else:
            return "smallint"

Ancestors

Static methods

def name(backend: Backend = Backend.POSTGRES) ‑> str
Expand source code Browse git
@staticmethod
def name(backend: Backend = Backend.POSTGRES) -> str:
    if backend == Backend.AVRO:
        return "int"  # no explicit support in AVRO
    elif backend == Backend.JSON:
        return "integer"  # no explicit support in JSON
    else:
        return "smallint"

Inherited members

class Text
Expand source code Browse git
class Text(DataType):
    @staticmethod
    def random_value(
        rng: random.Random,
        record_size: RecordSize = RecordSize.LARGE,
        in_query: bool = False,
    ) -> Any:
        if rng.randrange(10) == 0:
            result = rng.choice(
                [
                    "NULL",
                    "0.0",
                    "True",
                    # "",
                    "表ポあA鷗ŒéB逍Üߪąñ丂㐀𠀀",
                    rng.randint(-100, 100),
                ]
            )
        # Fails: unterminated dollar-quoted string
        # chars = string.printable
        chars = string.ascii_letters + string.digits
        if record_size == RecordSize.TINY:
            result = rng.choice(("foo", "bar", "baz"))
        elif record_size == RecordSize.SMALL:
            result = "".join(rng.choice(chars) for _ in range(3))
        elif record_size == RecordSize.MEDIUM:
            result = "".join(rng.choice(chars) for _ in range(10))
        elif record_size == RecordSize.LARGE:
            result = "".join(rng.choice(chars) for _ in range(100))
        else:
            raise ValueError(f"Unexpected record size {record_size}")

        return literal(str(result)) if in_query else str(result)

    @staticmethod
    def numeric_value(num: int, in_query: bool = False) -> Any:
        result = f"key{num}"
        return f"'{result}'" if in_query else str(result)

    @staticmethod
    def name(backend: Backend = Backend.POSTGRES) -> str:
        if backend == Backend.POSTGRES:
            return "text"
        else:
            return "string"

Ancestors

Subclasses

Static methods

def name(backend: Backend = Backend.POSTGRES) ‑> str
Expand source code Browse git
@staticmethod
def name(backend: Backend = Backend.POSTGRES) -> str:
    if backend == Backend.POSTGRES:
        return "text"
    else:
        return "string"

Inherited members

class TextTextMap
Expand source code Browse git
class TextTextMap(DataType):
    @staticmethod
    def name(backend: Backend = Backend.POSTGRES) -> str:
        if backend == Backend.AVRO:
            return "record"
        elif backend == Backend.JSON:
            return "object"
        else:
            return "map[text=>text]"

    @staticmethod
    def random_value(
        rng: random.Random,
        record_size: RecordSize = RecordSize.LARGE,
        in_query: bool = False,
    ) -> Any:
        if record_size == RecordSize.TINY:
            key_range = 1
        elif record_size == RecordSize.SMALL:
            key_range = 5
        elif record_size == RecordSize.MEDIUM:
            key_range = 10
        elif record_size == RecordSize.LARGE:
            key_range = 20
        else:
            raise ValueError(f"Unexpected record size {record_size}")
        values = [
            f"{Text.numeric_value(i)} => {str(rng.randint(-100, 100))}"
            for i in range(0, key_range)
        ]
        values_str = f"{{{', '.join(values)}}}"
        return f"'{values_str}'::map[text=>text]" if in_query else values_str

    @staticmethod
    def numeric_value(num: int, in_query: bool = False) -> Any:
        values = [
            f"{Text.numeric_value(num)} => {Text.numeric_value(num)}"
            for i in range(0, num)
        ]
        values_str = f"{{{', '.join(values)}}}"
        return f"'{values_str}'::map[text=>text]" if in_query else values_str

Ancestors

Static methods

def name(backend: Backend = Backend.POSTGRES) ‑> str
Expand source code Browse git
@staticmethod
def name(backend: Backend = Backend.POSTGRES) -> str:
    if backend == Backend.AVRO:
        return "record"
    elif backend == Backend.JSON:
        return "object"
    else:
        return "map[text=>text]"

Inherited members

class UUID
Expand source code Browse git
class UUID(DataType):
    @staticmethod
    def random_value(
        rng: random.Random,
        record_size: RecordSize = RecordSize.LARGE,
        in_query: bool = False,
    ) -> Any:
        result = rng.choice(
            [
                "a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11",
                "6f5eec33-a3c9-40b2-ae06-58f53aca6e7d",
                "00000000-0000-0000-0000-000000000000",
                "ffffffff-ffff-ffff-ffff-ffffffffffff",
                uuid.UUID(int=rng.getrandbits(128), version=4),
            ]
        )
        return f"'{result}'::uuid" if in_query else str(result)

    @staticmethod
    def numeric_value(num: int, in_query: bool = False) -> Any:
        result = uuid.uuid1(clock_seq=num)
        return f"'{result}'::uuid" if in_query else str(result)

    @staticmethod
    def name(backend: Backend = Backend.POSTGRES) -> str:
        return "uuid"

Ancestors

Static methods

def name(backend: Backend = Backend.POSTGRES) ‑> str
Expand source code Browse git
@staticmethod
def name(backend: Backend = Backend.POSTGRES) -> str:
    return "uuid"

Inherited members