Exabel · ivarurdalen · Oct 18, 2024 · Oct 18, 2024 · Oct 18, 2024 · Oct 18, 2024
diff --git a/.github/workflows/build_test.yml b/.github/workflows/build_test.yml
@@ -29,7 +29,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
+        python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
       fail-fast: true
 
     steps:

diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
@@ -39,7 +39,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
+        python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
       fail-fast: true
 
     steps:

diff --git a/.github/workflows/test_installability.yml b/.github/workflows/test_installability.yml
@@ -11,7 +11,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
+        python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
       fail-fast: true
 
     steps:

diff --git a/.python-version b/.python-version
@@ -1 +1 @@
-3.8.18
+3.9.20
diff --git a/Dockerfile b/Dockerfile
diff --git a/Jenkinsfile b/Jenkinsfile
diff --git a/VERSION b/VERSION
@@ -1 +1 @@
-5.3.0
+5.4.0
diff --git a/exabel_data_sdk/client/api/data_classes/data_set.py b/exabel_data_sdk/client/api/data_classes/data_set.py
@@ -81,3 +81,13 @@ def __lt__(self, other: object) -> bool:
         if not isinstance(other, DataSet):
             raise ValueError(f"Cannot compare DataSet to non-DataSet: {other}")
         return self.name < other.name
+
+
+def print_data_set(data_set: DataSet) -> None:
+    """Print a data set."""
+    print(f"Resource name:\t{data_set.name}")
+    print(f"Display name:\t{data_set.display_name}")
+    print(f"Description:\t{data_set.description}")
+    print("Signals:")
+    for signal in sorted(data_set.signals):
+        print(f"\t{signal}")
diff --git a/exabel_data_sdk/scripts/check_company_identifiers_in_csv.py b/exabel_data_sdk/scripts/check_company_identifiers_in_csv.py
@@ -32,6 +32,13 @@ def __init__(self, argv: Sequence[str]):
             type=str,
             help="The type of the identifiers to look up. Defaults to the same as the column name.",
         )
+        self.parser.add_argument(
+            "--entity-type",
+            type=str,
+            choices=["company", "security"],
+            default="company",
+            help="Specify the entity type, either 'company' or 'security' (defaults to 'company')",
+        )
         self.parser.add_argument(
             "--print-all-identifiers",
             action="store_true",
@@ -40,6 +47,11 @@ def __init__(self, argv: Sequence[str]):
                 "company."
             ),
         )
+        self.parser.add_argument(
+            "--output-file",
+            type=str,
+            help="The file to write the output to (defaults to stdout).",
+        )
 
     def _get_identifier_type(
         self, identifier_column: str, identifier_type: Optional[str] = None
@@ -81,16 +93,20 @@ def _process_entity_resource_names(
         return df
 
     def run_script(self, client: ExabelClient, args: argparse.Namespace) -> None:
-        data_frame = self.read_csv(args, string_columns=[args.identifier_column])
-        data_frame = data_frame[[args.identifier_column]]
-        identifiers = data_frame[args.identifier_column].drop_duplicates()
+        df = self.read_csv(args, string_columns=[args.identifier_column]).dropna(
+            subset=[args.identifier_column]
+        )
         identifier_type = self._get_identifier_type(args.identifier_column, args.identifier_type)
-        identifiers = identifiers.rename(identifier_type)
+        df.rename(columns={args.identifier_column: identifier_type}, inplace=True)
+        identifiers = df[identifier_type].drop_duplicates()
         entity_resource_names = to_entity_resource_names(
-            client.entity_api, identifiers=identifiers, check_entity_types=False
+            client.entity_api,
+            identifiers=identifiers,
+            check_entity_types=False,
+            entity_type=args.entity_type,
         )
         checked_data_frame = self._process_entity_resource_names(
-            entity_resource_names, identifier_type, args.print_all_identifiers
+            entity_resource_names, identifier_type, keep_all_identifiers=args.print_all_identifiers
         )
         with pd.option_context(
             "display.max_rows", None, "display.max_columns", None, "display.width", None
@@ -101,6 +117,11 @@ def run_script(self, client: ExabelClient, args: argparse.Namespace) -> None:
                 f"identifiers successfully mapped to companies out of {len(identifiers)} "
                 "identifiers in total"
             )
+        if args.output_file:
+            df.rename(columns={args.identifier_column: identifier_type}, inplace=True)
+            result_df = df.merge(checked_data_frame, how="left", on=identifier_type)
+            result_df.to_csv(args.output_file, index=False)
+            print(f"Identifier mappings written to {args.output_file}")
 
 
 if __name__ == "__main__":

diff --git a/exabel_data_sdk/scripts/create_data_set.py b/exabel_data_sdk/scripts/create_data_set.py
@@ -0,0 +1,70 @@
+import argparse
+import sys
+from typing import Sequence
+
+from exabel_data_sdk import ExabelClient
+from exabel_data_sdk.client.api.data_classes.data_set import DataSet, print_data_set
+from exabel_data_sdk.scripts import utils
+from exabel_data_sdk.scripts.base_script import BaseScript
+
+
+class CreateDataSet(BaseScript):
+    """
+    Create a data set.
+    """
+
+    def __init__(self, argv: Sequence[str], description: str):
+        super().__init__(argv, description)
+        self.parser.add_argument(
+            "--name",
+            required=True,
+            type=utils.data_set_resource_name,
+            help="The resource name of the new data set, for example 'dataSets/ns.transactions'.",
+        )
+        self.parser.add_argument(
+            "--display-name",
+            required=True,
+            type=str,
+            help="The display name of the data set",
+        )
+        self.parser.add_argument(
+            "--description",
+            required=False,
+            type=str,
+            default="",
+            help="One or more paragraphs of text description",
+        )
+        signals_group = self.parser.add_mutually_exclusive_group()
+        signals_group.add_argument(
+            "--signals",
+            required=False,
+            type=utils.signal_resource_name,
+            nargs="+",
+            help="Resource names of signals which should be included.",
+        )
+        signals_group.add_argument(
+            "--signals-file",
+            required=False,
+            type=str,
+            help="A plain text file to read signals from, with one signal resource name per line.",
+        )
+
+    def run_script(self, client: ExabelClient, args: argparse.Namespace) -> None:
+        signals: Sequence[str] = []
+        if args.signals:
+            signals = args.signals
+        elif args.signals_file:
+            signals = utils.read_signals_from_file(args.signals_file)
+
+        data_set = DataSet(
+            name=args.name,
+            display_name=args.display_name,
+            description=args.description,
+            signals=list(set(signals)),
+        )
+        result = client.data_set_api.create_data_set(data_set)
+        print_data_set(result)
+
+
+if __name__ == "__main__":
+    CreateDataSet(sys.argv, "Create a data set.").run()