ISCC - Codec#

This module implements encoding, decoding and transcoding functions of ISCC

Codec Overview#

ISCC - data structure

Codec Functions#

`encode_component(mtype, stype, version, bit_length, digest)` #

Encode an ISCC-UNIT inlcuding header and body with standard base32 encoding.

Note

The length value must be the length in number of bits for the component. If digest has more bits than specified by length it wil be truncated.

Parameters:

Name	Type	Description	Default
`mtype`	`MainType`	Maintype of unit (0-6)	required
`stype`	`SubType`	SubType of unit depending on MainType (0-5)	required
`version`	`Version`	Version of unit algorithm (0).	required
`bit_length`	`length`	Length of unit, in number of bits (multiple of 32)	required
`digest`	`bytes`	The hash digest of the unit.	required

Returns:

Type	Description
`str`	Base32 encoded ISCC-UNIT.

Source code in iscc_core\codec.py

def encode_component(mtype, stype, version, bit_length, digest):
    # type: (MainType, SubType, Version, Length, bytes) -> str
    """
    Encode an ISCC-UNIT inlcuding header and body with standard base32 encoding.

    !!! note
        The `length` value must be the **length in number of bits** for the component.
        If `digest` has more bits than specified by `length` it wil be truncated.


    :param MainType mtype: Maintype of unit (0-6)
    :param SubType stype: SubType of unit depending on MainType (0-5)
    :param Version version: Version of unit algorithm (0).
    :param length bit_length: Length of unit, in number of bits (multiple of 32)
    :param bytes digest: The hash digest of the unit.
    :return: Base32 encoded ISCC-UNIT.
    :rtype: str
    """
    if mtype in (MT.META, MT.SEMANTIC, MT.CONTENT, MT.DATA, MT.INSTANCE, MT.ID, MT.FLAKE):
        encoded_length = encode_length(mtype, bit_length)
    elif mtype == MT.ISCC:
        raise ValueError(f"{mtype} is not a unit")
    else:
        raise ValueError(f"Illegal MainType {mtype}")

    nbytes = bit_length // 8
    header = encode_header(mtype, stype, version, encoded_length)
    body = digest[:nbytes]
    component_code = encode_base32(header + body)
    return component_code

`encode_header(mtype, stype, version = 0, length = 1)` #

Encodes header values with nibble-sized (4-bit) variable-length encoding. The result is minimum 2 and maximum 8 bytes long. If the final count of nibbles is uneven it is padded with 4-bit 0000 at the end.

Warning

The length value must be encoded beforhand because its semantics depend on the MainType (see encode_length function).

Parameters:

Name	Type	Description	Default
`mtype`	`MainType`	MainType of unit.	required
`stype`	`SubType`	SubType of unit.	required
`version`	`Version`	Version of component algorithm.	`0`
`length`	`Length`	length value of unit (1 means 64-bits for standard units)	`1`

Returns:

Type	Description
`bytes`	Varnibble stream encoded ISCC header as bytes.

Source code in iscc_core\codec.py

def encode_header(mtype, stype, version=0, length=1):
    # type: (MainType, SubType, Version, Length) -> bytes
    """
    Encodes header values with nibble-sized (4-bit) variable-length encoding.
    The result is minimum 2 and maximum 8 bytes long. If the final count of nibbles
    is uneven it is padded with 4-bit `0000` at the end.

    !!! warning
        The length value must be encoded beforhand because its semantics depend on
        the MainType (see `encode_length` function).

    :param MainType mtype: MainType of unit.
    :param SubType stype: SubType of unit.
    :param Version version: Version of component algorithm.
    :param Length length: length value of unit (1 means 64-bits for standard units)
    :return: Varnibble stream encoded ISCC header as bytes.
    :rtype: bytes

    """
    header = bitarray()
    for n in (mtype, stype, version, length):
        header += encode_varnibble(n)
    # Append zero-padding if required (right side, least-significant bits).
    header.fill()
    return header.tobytes()

`decode_header(data)` #

Decodes varnibble encoded header and returns it together with tail data.

Tail data is included to enable decoding of sequential ISCCs. The returned tail data must be truncated to decode_length(r[0], r[3]) bits to recover the actual hash-bytes.

Parameters:

Name	Type	Description	Default
`data`	`bytes`	ISCC bytes	required

Returns:

Type	Description
`IsccTuple`	(MainType, SubType, Version, length, TailData)

Source code in iscc_core\codec.py

def decode_header(data):
    # type: (bytes) -> IsccTuple
    """
    Decodes varnibble encoded header and returns it together with `tail data`.

    Tail data is included to enable decoding of sequential ISCCs. The returned tail
    data must be truncated to decode_length(r[0], r[3]) bits to recover the actual
    hash-bytes.

    :param bytes data: ISCC bytes
    :return: (MainType, SubType, Version, length, TailData)
    :rtype: IsccTuple
    """
    result = []
    ba = bitarray()
    ba.frombytes(data)
    data = ba
    for _ in range(4):
        value, data = decode_varnibble(data)
        result.append(value)

    # Strip 4-bit padding if required
    if len(data) % 8 and data[:4] == bitarray("0000"):
        data = data[4:]

    result.append(data.tobytes())

    return tuple(result)

`encode_varnibble(n)` #

Writes integer to variable length sequence of 4-bit chunks.

Variable-length encoding scheme:

prefix bits	nibbles	data bits	unsigned range
0	1	3	0 - 7
10	2	6	8 - 71
110	3	9	72 - 583
1110	4	12	584 - 4679

Parameters:

Name	Type	Description	Default
`n`	`int`	Positive integer to be encoded as varnibble (0-4679)	required

Returns:

Type	Description
`bitarray`	Varnibble encoded integera

Source code in iscc_core\codec.py

def encode_varnibble(n):
    # type: (int) -> bitarray
    """
    Writes integer to variable length sequence of 4-bit chunks.

    Variable-length encoding scheme:

    ------------------------------------------------------
    | prefix bits | nibbles | data bits | unsigned range |
    | ----------- | ------- | --------- | -------------- |
    | 0           | 1       | 3         | 0 - 7          |
    | 10          | 2       | 6         | 8 - 71         |
    | 110         | 3       | 9         | 72 - 583       |
    | 1110        | 4       | 12        | 584 - 4679     |

    :param int n: Positive integer to be encoded as varnibble (0-4679)
    :return: Varnibble encoded integera
    :rtype: bitarray
    """
    if 0 <= n < 8:
        return int2ba(n, length=4)
    elif 8 <= n < 72:
        return bitarray("10") + int2ba(n - 8, length=6)
    elif 72 <= n < 584:
        return bitarray("110") + int2ba(n - 72, length=9)
    elif 584 <= n < 4680:
        return bitarray("1110") + int2ba(n - 584, length=12)
    else:
        raise ValueError("Value must be between 0 and 4679")

`decode_varnibble(b)` #

Reads first varnibble, returns its integer value and remaining bits.

Parameters:

Name	Type	Description	Default
`b`	`bitarray`	Array of header bits	required

Returns:

Type	Description
`Tuple[int, bitarray]`	A tuple of the integer value of first varnible and the remaining bits.

Raises:

Type	Description
`ValueError`	If input is invalid or too short

Source code in iscc_core\codec.py

def decode_varnibble(b):
    # type: (bitarray) -> Tuple[int, bitarray]
    """Reads first varnibble, returns its integer value and remaining bits.

    :param bitarray b: Array of header bits
    :return: A tuple of the integer value of first varnible and the remaining bits.
    :rtype: Tuple[int, bitarray]
    :raises ValueError: If input is invalid or too short
    """
    bits = len(b)

    if bits < 4:
        raise ValueError("Input too short - minimum 4 bits required")

    # Check prefix patterns and required lengths
    if b[0] == 0 and bits >= 4:  # 0xxx
        return ba2int(b[:4]), b[4:]
    if b[0:2] == bitarray("10") and bits >= 8:  # 10xxxxxx
        return ba2int(b[2:8]) + 8, b[8:]
    if b[0:3] == bitarray("110") and bits >= 12:  # 110xxxxxxxxx
        return ba2int(b[3:12]) + 72, b[12:]
    if b[0:4] == bitarray("1110") and bits >= 16:  # 1110xxxxxxxxxxxx
        return ba2int(b[4:16]) + 584, b[16:]

    # Determine actual error for better feedback
    if bits < 16:
        raise ValueError(f"Input too short - got {bits} bits but need more based on prefix")
    else:
        prefix = b[:4].to01()
        raise ValueError(f"Invalid prefix pattern '{prefix}' - must be one of: 0, 10, 110, 1110")

`encode_units(units)` #

Encodes a combination of ISCC units to an integer between 0-7 to be used as a length value for the final encoding of MT.ISCC

Parameters:

Name	Type	Description	Default
`units`	`Tuple`	A tuple of a MainType combination (can be empty)	required

Returns:

Type	Description
`int`	Integer value to be used as length-value for header encoding

Raises:

Type	Description
`ValueError`	If the combination of ISCC-UNITs is invalid

Source code in iscc_core\codec.py

def encode_units(units):
    # type: (Tuple[MT, ...]) -> int
    """
    Encodes a combination of ISCC units to an integer between 0-7 to be used as a length
    value for the final encoding of MT.ISCC

    :param Tuple units: A tuple of a MainType combination (can be empty)
    :return: Integer value to be used as length-value for header encoding
    :rtype: int
    :raises ValueError: If the combination of ISCC-UNITs is invalid
    """
    try:
        return UNITS.index(units)
    except ValueError:
        # First check if all units are valid MT enum values
        for u in units:
            if not isinstance(u, MT):
                raise ValueError(f"Invalid ISCC-UNIT {u} - must be of type MT")

        # If all units are valid, create a helpful error message
        unit_names = [f"{MT(u).name}" for u in units] if units else ["empty"]
        raise ValueError(f"Invalid ISCC-UNIT combination: {', '.join(unit_names)}")

`decode_units(unit_id)` #

Decodes an ISCC header length value that has been encoded with a unit_id to an ordered tuple of MainTypes.

Source code in iscc_core\codec.py

def decode_units(unit_id):
    # type: (int) -> Tuple[MT, ...]
    """
    Decodes an ISCC header length value that has been encoded with a unit_id to an
    ordered tuple of MainTypes.
    """
    units = sorted(UNITS[unit_id])
    return tuple(MT(u) for u in units)

`encode_length(mtype, length)` #

Encode length to integer value for header encoding.

The length value has MainType-specific semantics:

For MainTypes META, SEMANTIC, CONTENT, DATA, INSTANCE:

Length means number of bits for the body.
Length is encoded as the multiple of 32-bit chunks (0 being 32bits)
Examples: 32 -> 0, 64 -> 1, 96 -> 2 ...

For MainType ISCC:

MainTypes `DATA` and `INSTANCE` are mandatory for ISCC-CODEs, all others are
optional. Length means the composition of optional 64-bit units included
in the ISCC composite.

Examples:
    No optional units      -> 0000 -> 0
    CONTENT                -> 0001 -> 1
    SEMANTIC               -> 0010 -> 2
    SEMANTIC, CONTENT      -> 0011 -> 3
    META                   -> 0100 -> 4
    META, CONTENT          -> 0101 -> 5
    ...

For MainType ID:

Lengths means number the number of bits for the body including the counter
Length is encoded as number of bytes of the counter (64-bit body is implicit)
Examples:
    64 -> 0 (No counter)
    72 -> 1 (One byte counter)
    80 -> 2 (Two byte counter)
    ...

Parameters:

Name	Type	Description	Default
`mtype`	`MainType`	The MainType for which to encode the length value.	required
`length`	`Length`	The length expressed according to the semantics of the type	required

Returns:

Type	Description
`int`	The length value encoded as integer for use with write_header.

Source code in iscc_core\codec.py

def encode_length(mtype, length):
    # type: (MainType, Length) -> int
    """
    Encode length to integer value for header encoding.

    The `length` value has MainType-specific semantics:

    For MainTypes `META`, `SEMANTIC`, `CONTENT`, `DATA`, `INSTANCE`:

        Length means number of bits for the body.
        Length is encoded as the multiple of 32-bit chunks (0 being 32bits)
        Examples: 32 -> 0, 64 -> 1, 96 -> 2 ...

    For MainType `ISCC`:

        MainTypes `DATA` and `INSTANCE` are mandatory for ISCC-CODEs, all others are
        optional. Length means the composition of optional 64-bit units included
        in the ISCC composite.

        Examples:
            No optional units      -> 0000 -> 0
            CONTENT                -> 0001 -> 1
            SEMANTIC               -> 0010 -> 2
            SEMANTIC, CONTENT      -> 0011 -> 3
            META                   -> 0100 -> 4
            META, CONTENT          -> 0101 -> 5
            ...

    For MainType `ID`:

        Lengths means number the number of bits for the body including the counter
        Length is encoded as number of bytes of the counter (64-bit body is implicit)
        Examples:
            64 -> 0 (No counter)
            72 -> 1 (One byte counter)
            80 -> 2 (Two byte counter)
            ...

    :param MainType mtype: The MainType for which to encode the length value.
    :param Length length: The length expressed according to the semantics of the type
    :return: The length value encoded as integer for use with write_header.
    :rtype: int
    """

    error = f"Invalid length {length} for MainType {mtype}"
    # standard case (length field denotes number of 32-bit chunks, 0 being 32-bits)
    if mtype in (MT.META, MT.SEMANTIC, MT.CONTENT, MT.DATA, MT.INSTANCE, MT.FLAKE):
        if length >= 32 and not length % 32:
            return (length // 32) - 1
        raise ValueError(error)
    # flag type encoding of included components (pass through as encoded out-of-band)
    elif mtype == MT.ISCC:
        if 0 <= length <= 7:
            return length
        raise ValueError(error)
    # counter byte length encoding
    elif mtype == MT.ID:
        if 64 <= length <= 96:
            return (length - 64) // 8
        raise ValueError(error)
    else:
        raise ValueError(error)

`decode_length(mtype, length, subtype = None)` #

Dedoce raw length value from ISCC header to length of digest in number of bits.

Decodes a raw header integer value in to its semantically meaningfull value (e.g. number of bits)

Source code in iscc_core\codec.py

def decode_length(mtype, length, subtype=None):
    # type: (MainType, Length, SubType|None) -> LN
    """
    Dedoce raw length value from ISCC header to length of digest in number of bits.

    Decodes a raw header integer value in to its semantically meaningfull value (e.g.
    number of bits)
    """
    if mtype in (MT.META, MT.SEMANTIC, MT.CONTENT, MT.DATA, MT.INSTANCE, MT.FLAKE):
        return LN((length + 1) * 32)
    elif mtype == MT.ISCC:
        if subtype == ST_ISCC.WIDE:
            return LN(256)  # 128-bit Data + 128-bit Instance
        return LN(len(decode_units(length)) * 64 + 128)
    elif mtype == MT.ID:
        return LN(length * 8 + 64)
    else:
        raise ValueError(f"Invalid length {length} for MainType {mtype}")

`encode_base32(data)` #

Standard RFC4648 base32 encoding without padding.

Source code in iscc_core\codec.py

def encode_base32(data):
    # type: (bytes) -> str
    """
    Standard RFC4648 base32 encoding without padding.
    """
    return b32encode(data).decode("ascii").rstrip("=")

`decode_base32(code)` #

Standard RFC4648 base32 decoding without padding and with casefolding.

Source code in iscc_core\codec.py

def decode_base32(code):
    # type: (str) -> bytes
    """
    Standard RFC4648 base32 decoding without padding and with casefolding.
    """
    # python stdlib does not support base32 without padding, so we have to re-pad.
    cl = len(code)
    pad_length = math.ceil(cl / 8) * 8 - cl

    return bytes(b32decode(code + "=" * pad_length, casefold=True))

`iscc_decompose(iscc_code)` #

Decompose a normalized ISCC-CODE or any valid ISCC sequence into a list of ISCC-UNITS.

A valid ISCC sequence is a string concatenation of ISCC-UNITS optionally seperated by a hyphen.

Source code in iscc_core\codec.py

def iscc_decompose(iscc_code):
    # type: (str) -> List[str]
    """
    Decompose a normalized ISCC-CODE or any valid ISCC sequence into a list of ISCC-UNITS.

    A valid ISCC sequence is a string concatenation of ISCC-UNITS optionally seperated
    by a hyphen.
    """
    # Handle multiformat encoding first
    iscc_code = normalize_multiformat(iscc_code)

    components = []
    raw_code = decode_base32(iscc_code)
    while raw_code:
        mt, st, vs, ln, body = decode_header(raw_code)
        # standard ISCC-UNIT with tail continuation
        if mt != MT.ISCC:
            ln_bits = decode_length(mt, ln)
            code = encode_component(mt, st, vs, ln_bits, body[: ln_bits // 8])
            components.append(code)
            raw_code = body[ln_bits // 8 :]
            continue

        # ISCC-CODE
        main_types = decode_units(ln)

        # Special case for WIDE subtype (128-bit Data + 128-bit Instance)
        if st == ST_ISCC.WIDE:
            data_code = encode_component(MT.DATA, ST.NONE, vs, 128, body[:16])
            instance_code = encode_component(MT.INSTANCE, ST.NONE, vs, 128, body[16:32])
            components.extend([data_code, instance_code])
            break

        # rebuild dynamic units (META, SEMANTIC, CONTENT)
        for idx, mtype in enumerate(main_types):
            stype = ST.NONE if mtype == MT.META else st
            code = encode_component(mtype, stype, vs, 64, body[idx * 8 :])
            components.append(code)

        # rebuild static units (DATA, INSTANCE)
        data_code = encode_component(MT.DATA, ST.NONE, vs, 64, body[-16:-8])
        instance_code = encode_component(MT.INSTANCE, ST.NONE, vs, 64, body[-8:])
        components.extend([data_code, instance_code])
        break

    return components

`iscc_normalize(iscc_code)` #

Normalize an ISCC to its canonical form.

The canonical form of an ISCC is its shortest base32 encoded representation prefixed with the string ISCC:.

Possible valid inputs:

MEACB7X7777574L6
ISCC:MEACB7X7777574L6
fcc010001657fe7cafe9791bb
iscc:maagztfqttvizpjr
Iscc:Maagztfqttvizpjr

Info

A concatenated sequence of codes will be composed into a single ISCC of MainType MT.ISCC if possible.

Example

>>> import iscc_core
>>> iscc_core.iscc_normalize("GAAW2PRCRS5LNVZV-IAAUVACQKXE3V44W")
'ISCC:KUAG2PRCRS5LNVZVJKAFAVOJXLZZM'

Parameters:

Name	Type	Description	Default
`iscc_code`	`str`	Any valid ISCC string	required

Returns:

Type	Description
`str`	Normalized ISCC

Source code in iscc_core\codec.py

def iscc_normalize(iscc_code):
    # type: (str) -> str
    """
    Normalize an ISCC to its canonical form.

    The canonical form of an ISCC is its shortest base32 encoded representation
    prefixed with the string `ISCC:`.

    Possible valid inputs:

        MEACB7X7777574L6
        ISCC:MEACB7X7777574L6
        fcc010001657fe7cafe9791bb
        iscc:maagztfqttvizpjr
        Iscc:Maagztfqttvizpjr


    !!! info
        A concatenated sequence of codes will be composed into a single ISCC of MainType
        `MT.ISCC` if possible.

    !!! example
        ``` py
        >>> import iscc_core
        >>> iscc_core.iscc_normalize("GAAW2PRCRS5LNVZV-IAAUVACQKXE3V44W")
        'ISCC:KUAG2PRCRS5LNVZVJKAFAVOJXLZZM'

        ```

    :param str iscc_code: Any valid ISCC string
    :return: Normalized ISCC
    :rtype: str
    """
    from iscc_core.iscc_code import gen_iscc_code_v0

    # Handle multiformat encoding first
    iscc_code = normalize_multiformat(iscc_code)

    # Validate prefix
    prefix = iscc_code.upper()[:2]
    if prefix not in PREFIXES:
        raise ValueError(f"ISCC starts with invalid prefix {prefix}")

    # Check if this is a WIDE ISCC code
    cleaned_code = iscc_clean(iscc_code)
    header = decode_header(decode_base32(cleaned_code))
    is_wide = header[0] == MT.ISCC and header[1] == ST_ISCC.WIDE

    decomposed = iscc_decompose(iscc_code)
    recomposed = (
        gen_iscc_code_v0(decomposed, wide=is_wide)["iscc"]
        if len(decomposed) >= 2
        else decomposed[0]
    )
    return f"ISCC:{recomposed}" if not recomposed.startswith("ISCC:") else recomposed

Alternate Encodings#

`encode_base64(data)` #

Standard RFC4648 base64url encoding without padding.

Source code in iscc_core\codec.py

def encode_base64(data):
    # type: (bytes) -> str
    """
    Standard RFC4648 base64url encoding without padding.
    """
    code = urlsafe_b64encode(data).decode("ascii")
    return code.rstrip("=")

`decode_base64(code)` #

Standard RFC4648 base64url decoding without padding.

Source code in iscc_core\codec.py

def decode_base64(code):
    # type: (str) -> bytes
    """
    Standard RFC4648 base64url decoding without padding.
    """
    padding = 4 - (len(code) % 4)
    string = code + ("=" * padding)
    return urlsafe_b64decode(string)

`encode_base32hex(data)` #

RFC4648 Base32hex encoding without padding

see: https://tools.ietf.org/html/rfc4648#page-10

Source code in iscc_core\codec.py

def encode_base32hex(data):
    # type: (bytes) ->  str
    """
    RFC4648 Base32hex encoding without padding

    see: https://tools.ietf.org/html/rfc4648#page-10
    """
    b32 = encode_base32(data)
    return b32.translate(b32_to_hex)

`decode_base32hex(code)` #

RFC4648 Base32hex decoding without padding

see: https://tools.ietf.org/html/rfc4648#page-10

Source code in iscc_core\codec.py

def decode_base32hex(code):
    # type: (str) -> bytes
    """
    RFC4648 Base32hex decoding without padding

    see: https://tools.ietf.org/html/rfc4648#page-10
    """
    # Make sure we use upper-case version for translation
    b32 = code.upper().translate(hex_to_b32)
    return decode_base32(b32)

`normalize_multiformat(iscc_code)` #

Normalize a multiformat encoded ISCC to standard base32 encoding. Returns the input unchanged (but cleaned) if it's not multiformat encoded.

Source code in iscc_core\codec.py

def normalize_multiformat(iscc_code):
    """
    Normalize a multiformat encoded ISCC to standard base32 encoding.
    Returns the input unchanged (but cleaned) if it's not multiformat encoded.
    """
    decoders = {
        MULTIBASE.base16.value: bytes.fromhex,  # f
        MULTIBASE.base32.value: decode_base32,  # b
        MULTIBASE.base32hex.value: decode_base32hex,  # v
        MULTIBASE.base58btc.value: base58.b58decode,  # z
        MULTIBASE.base64url.value: decode_base64,  # u
    }

    # Clean the ISCC code first
    iscc_code = iscc_clean(iscc_code)

    # Check for multibase prefix
    multibase_prefix = iscc_code[0]
    if multibase_prefix in decoders.keys():
        decoder = decoders[multibase_prefix]
        decoded = decoder(iscc_code[1:])
        if not decoded.startswith(MC_PREFIX):
            raise ValueError(f"Malformed multiformat codec: {decoded[:2]}")
        return encode_base32(decoded[2:])
    return iscc_code

Helper Functions#

`iscc_decode(iscc)` #

Decode ISCC to an IsccTuple

Parameters:

Name	Type	Description	Default
`iscc`	`str`	ISCC string	required

Returns:

Type	Description
`IsccTuple`	ISCC decoded to a tuple

Source code in iscc_core\codec.py

def iscc_decode(iscc):
    # type: (str) -> IsccTuple
    """
    Decode ISCC to an IsccTuple

    :param str iscc: ISCC string
    :return: ISCC decoded to a tuple
    :rtype: IsccTuple
    """
    iscc = iscc_clean(iscc_normalize(iscc))
    data = decode_base32(iscc)
    return decode_header(data)

`iscc_explain(iscc)` #

Convert ISCC to a human-readable representation

Parameters:

Name	Type	Description	Default
`iscc`	`str`	ISCC string	required

Returns:

Type	Description
`str`	Human-readable representation of ISCC

Source code in iscc_core\codec.py

def iscc_explain(iscc):
    # type: (str) -> str
    """
    Convert ISCC to a human-readable representation

    :param str iscc: ISCC string
    :return: Human-readable representation of ISCC
    :rtype: str
    """
    tid = iscc_type_id(iscc)
    fields = iscc_decode(iscc)

    # Special handling for ISCC-ID
    if fields[0] == MT.ID:
        # Special handling for ISCC-IDv1
        if fields[2] == VS.V1:
            # For IDv1, format as ID-REALM_<id>-V1-64-<timestamp>-<serverid>
            realm_id = fields[1]
            digest_int = int.from_bytes(fields[-1], byteorder="big")
            server_id = digest_int & 0xFFF  # Extract server_id (last 12 bits)
            timestamp = digest_int >> 12  # Extract timestamp (first 52 bits)
            return f"ID-REALM_{realm_id}-V1-64-{timestamp}-{server_id}"

        # Regular handling for ISCC-IDv0 with counter
        counter_bytes = fields[-1][8:]
        if counter_bytes:
            counter = uvarint.decode(counter_bytes)
            hex_hash = fields[-1][:8].hex()
            return f"{tid}-{hex_hash}-{counter.integer}"

    hex_hash = fields[-1].hex()
    return f"{tid}-{hex_hash}"

`iscc_type_id(iscc)` #

Extract and convert ISCC HEADER to a readable Type-ID string.

Type-ids can be used as names in databases to index ISCC-UNITs seperatly.

Parameters:

Name	Type	Description	Default
`iscc`	`str`	ISCC string	required

Returns:

Type	Description
`str`	Unique Type-ID string

Source code in iscc_core\codec.py

def iscc_type_id(iscc):
    # type: (str) -> str
    """
    Extract and convert ISCC HEADER to a readable Type-ID string.

    Type-ids can be used as names in databases to index ISCC-UNITs seperatly.

    :param str iscc: ISCC string
    :return: Unique Type-ID string
    :rtype: str
    """
    fields = iscc_decode(iscc)
    mtype = MT(fields[0])
    stype = SUBTYPE_MAP[(fields[0], fields[2])](fields[1])

    if mtype == MT.ISCC:
        mtypes = decode_units(fields[3])
        length = "".join([t.name[0] for t in mtypes]) + "DI"
    else:
        length = decode_length(fields[0], fields[3])

    version = VS(fields[2])

    return f"{mtype.name}-{stype.name}-{version.name}-{length}"

`iscc_validate(iscc, strict = True)` #

Validate that a given string is a strictly well-formed ISCC.

A strictly well-formed ISCC is:

an ISCC-CODE or ISCC-UNIT
encoded with base32 upper without padding
has a valid combination of header values
is represented in its canonical form

Parameters:

Name	Type	Description	Default
`iscc`	`str`	ISCC string	required
`strict`	`bool`	Raise an exeption if validation fails (default True)	`True`

Returns:

Type	Description
`bool`	True if sting is valid else false. (raises ValueError in strict mode)

Source code in iscc_core\codec.py

def iscc_validate(iscc, strict=True):
    # type: (str, bool) -> bool
    """
    Validate that a given string is a *strictly well-formed* ISCC.

    A *strictly well-formed* ISCC is:

    - an ISCC-CODE or ISCC-UNIT
    - encoded with base32 upper without padding
    - has a valid combination of header values
    - is represented in its canonical form

    :param str iscc: ISCC string
    :param bool strict: Raise an exeption if validation fails (default True)
    :return: True if sting is valid else false. (raises ValueError in strict mode)
    :rtype: bool
    """

    # Basic regex validation
    match = CANONICAL_REGEX.match(iscc)
    if not match:
        if strict:
            raise ValueError("ISCC string does not match ^ISCC:[A-Z2-7]{10,68}$")
        else:
            return False

    # Base32 encoding test
    try:
        decode_base32(iscc.split(":")[1])
    except Exception as e:
        if strict:
            raise ValueError(e)
        else:
            return False

    cleaned = iscc_clean(iscc)

    # Prefix test
    prefix = cleaned[:2]
    if prefix not in PREFIXES:
        if strict:
            raise ValueError(f"Header starts with invalid sequence {prefix}")
        else:
            return False

    # Version test
    m, s, v, l, t = decode_header(decode_base32(cleaned))
    if v not in (0, 1):
        if strict:
            raise ValueError(f"Unknown version {v} in version header")
        else:
            return False

    # Length test
    expected_nbyptes = decode_length(m, l, s).value // 8
    actual_nbyptes = len(t)
    if expected_nbyptes != actual_nbyptes:
        if strict:
            raise ValueError(f"Header expects {expected_nbyptes} but got {actual_nbyptes} bytes")
        else:
            return False

    return True

`iscc_validate_mf(iscc, strict = True)` #

Validate that a given string is a well-formed ISCC in any supported encoding format.

Parameters:

Name	Type	Description	Default
`iscc`	`str`	ISCC string in any supported encoding	required
`strict`	`bool`	Raise an exception if validation fails (default True)	`True`

Returns:

Type	Description
`bool`	True if string is valid else false. (raises ValueError in strict mode)

Source code in iscc_core\codec.py

def iscc_validate_mf(iscc, strict=True):
    # type: (str, bool) -> bool
    """
    Validate that a given string is a well-formed ISCC in any supported encoding format.

    :param str iscc: ISCC string in any supported encoding
    :param bool strict: Raise an exception if validation fails (default True)
    :return: True if string is valid else false. (raises ValueError in strict mode)
    :rtype: bool
    """
    try:
        normalized = normalize_multiformat(iscc)
        return iscc_validate(f"ISCC:{normalized}", strict)
    except Exception:
        if strict:
            raise
        return False

`iscc_clean(iscc)` #

Cleanup ISCC string.

Removes leading scheme, dashes, leading/trailing whitespace.

Parameters:

Name	Type	Description	Default
`iscc`	`str`	Any valid ISCC string	required

Returns:

Type	Description
`str`	Cleaned ISCC string.

Source code in iscc_core\codec.py

def iscc_clean(iscc):
    # type: (str) -> str
    """
    Cleanup ISCC string.

    Removes leading scheme, dashes, leading/trailing whitespace.

    :param str iscc: Any valid ISCC string
    :return: Cleaned ISCC string.
    :rtype: str
    """
    split = [part.strip() for part in iscc.strip().split(":")]
    if len(split) == 1:
        code = split[0]
        # remove dashes if not multiformat
        if code[0] not in list(MULTIBASE):
            code = code.replace("-", "")
        return code
    elif len(split) == 2:
        scheme, code = split
        if scheme.lower() != "iscc":
            raise ValueError(f"Invalid scheme: {scheme}")
        return code.replace("-", "")
    else:
        raise ValueError(f"Malformed ISCC string: {iscc}")

ISCC - Codec#

Codec Overview#

Codec Functions#

encode_component(mtype, stype, version, bit_length, digest) #

encode_header(mtype, stype, version = 0, length = 1) #

decode_header(data) #

encode_varnibble(n) #

decode_varnibble(b) #

encode_units(units) #

decode_units(unit_id) #

encode_length(mtype, length) #

decode_length(mtype, length, subtype = None) #

encode_base32(data) #

decode_base32(code) #

iscc_decompose(iscc_code) #

iscc_normalize(iscc_code) #

Alternate Encodings#

encode_base64(data) #

decode_base64(code) #

encode_base32hex(data) #

decode_base32hex(code) #

normalize_multiformat(iscc_code) #

Helper Functions#

iscc_decode(iscc) #

iscc_explain(iscc) #

iscc_type_id(iscc) #

iscc_validate(iscc, strict = True) #

iscc_validate_mf(iscc, strict = True) #

iscc_clean(iscc) #

`encode_component(mtype, stype, version, bit_length, digest)` #

`encode_header(mtype, stype, version = 0, length = 1)` #

`decode_header(data)` #

`encode_varnibble(n)` #

`decode_varnibble(b)` #

`encode_units(units)` #

`decode_units(unit_id)` #

`encode_length(mtype, length)` #

`decode_length(mtype, length, subtype = None)` #

`encode_base32(data)` #

`decode_base32(code)` #

`iscc_decompose(iscc_code)` #

`iscc_normalize(iscc_code)` #

`encode_base64(data)` #

`decode_base64(code)` #

`encode_base32hex(data)` #

`decode_base32hex(code)` #

`normalize_multiformat(iscc_code)` #

`iscc_decode(iscc)` #

`iscc_explain(iscc)` #

`iscc_type_id(iscc)` #

`iscc_validate(iscc, strict = True)` #

`iscc_validate_mf(iscc, strict = True)` #

`iscc_clean(iscc)` #