dataiku · kevanescence · Nov 4, 2024 · Nov 4, 2024 · Nov 4, 2024 · Nov 4, 2024
diff --git a/HISTORY.txt b/HISTORY.txt
@@ -2,6 +2,11 @@ Changelog
 ==========
 
 
+13.2.2 (2024-11-04)
+---------------------
+
+* Initial release for DSS 13.2.2
+
 13.2.1 (2024-10-17)
 ---------------------
 

diff --git a/dataikuapi/dss/llm.py b/dataikuapi/dss/llm.py
@@ -150,13 +150,17 @@ def add_text(self, text):
         self.eq["queries"].append({"text": text})
         return self
 
-    def add_image(self, image_base64):
+    def add_image(self, image):
         """
         Add an image to the embedding query.
 
-        :param str image_base64: Image content, as a base 64 formatted string.
+        :param image: Image content as bytes or str (base64)
         """
-        self.eq["queries"].append({"inlineImage": image_base64})
+        if isinstance(image, str):
+            self.eq["queries"].append({"inlineImage": image})
+        elif isinstance(image, bytes):
+            import base64
+            self.eq["queries"].append({"inlineImage": base64.b64encode(image).decode("utf8")})
         return self
 
     def execute(self):
@@ -608,6 +612,20 @@ def with_mask(self, mode, image=None, text=None):
                 self.gq["maskImage"] = base64.b64encode(image).decode("utf8")
         return self
 
+    @property
+    def inference_steps(self):
+        return self.gq.get("nbInferenceSteps", None)
+    @inference_steps.setter
+    def inference_steps(self, new_value):
+        self.gq["nbInferenceSteps"] = new_value
+
+    @property
+    def refiner_strength(self):
+        return self.gq.get("refinerStrength", None)
+    @refiner_strength.setter
+    def refiner_strength(self, new_value):
+        self.gq["refinerStrength"] = new_value
+
     @property
     def height(self):
         return self.gq.get("height", None)
@@ -710,3 +728,21 @@ def first_image(self, as_type="bytes"):
 
         else:
             return self._raw["images"][0]["data"]
+
+    def get_images(self, as_type="bytes"):
+        """
+        :return: The generated images.
+        :rtype: List[str]
+        """
+
+        if not self.success:
+            raise Exception("Image generation did not succeed: %s" % self._raw["errorMessage"])
+
+        if len(self._raw["images"]) == 0:
+            raise Exception("Image generation succeeded but did not return any image")
+
+        if as_type == "bytes":
+            import base64
+            return [base64.b64decode(image["data"]) for image in self._raw["images"]]
+        else:
+            return [image["data"] for image in self._raw["images"]]
diff --git a/dataikuapi/dss/ml.py b/dataikuapi/dss/ml.py
@@ -4757,7 +4757,8 @@ def deploy_to_flow(self, model_id, model_name, train_dataset, test_dataset=None,
             "POST", "/projects/%s/models/lab/%s/%s/models/%s/actions/deployToFlow" % (self.project_key, self.analysis_id, self.mltask_id, model_id),
             body=obj)
 
-    def redeploy_to_flow(self, model_id, recipe_name=None, saved_model_id=None, activate=True):
+    def redeploy_to_flow(self, model_id, recipe_name=None, saved_model_id=None, activate=True,
+                         redo_optimization=False, redo_threshold_optimization=True, fixed_threshold=None):
         """
         Redeploys a trained model from this ML Task to an existing saved model and training recipe in the flow.
 
@@ -4769,17 +4770,27 @@ def redeploy_to_flow(self, model_id, recipe_name=None, saved_model_id=None, acti
         :param saved_model_id: Name of the saved model to update (defaults to **None**)
         :type saved_model_id: str, optional
         :param bool activate: If True (default), make the newly deployed model version become the active version
+        :param bool redo_optimization: Whether to re-run the model optimization (hyperparameter search) on every train
+        :param bool redo_threshold_optimization: Whether to redo the model threshold Optimization on every train (for binary classification models)
+        :param fixed_threshold: Value to use as fixed threshold. Must be set if redoThresholdOptimization is False (for binary classification models)
+        :type fixed_threshold: float, optional
         :return: A dict containing: "impactsDownstream" - whether the active saved mode version changed and downstream recipes are impacted
         :rtype: dict
         """
         obj = {
-            "recipeName" : recipe_name,
-            "savedModelId" : saved_model_id,
-            "activate" : activate
+            "recipeName": recipe_name,
+            "savedModelId": saved_model_id,
+            "activate": activate,
+            "redoOptimization": redo_optimization,
+            "redoThresholdOptimization": redo_threshold_optimization
         }
+
+        if fixed_threshold is not None:
+            obj["fixedThreshold"] = fixed_threshold
+
         return self.client._perform_json(
             "POST", "/projects/%s/models/lab/%s/%s/models/%s/actions/redeployToFlow" % (self.project_key, self.analysis_id, self.mltask_id, model_id),
-            body = obj)
+            body=obj)
 
     def remove_unused_splits(self):
         """

diff --git a/dataikuapi/dss/recipe.py b/dataikuapi/dss/recipe.py
@@ -96,7 +96,7 @@ def compute_schema_updates(self):
         Computes which updates are required to the outputs of this recipe.
 
         This method only computes which changes would be needed to make the schema of the outputs
-        of the reicpe match the actual schema that the recipe will produce. To effectively apply
+        of the recipe match the actual schema that the recipe will produce. To effectively apply
         these changes to the outputs, you can use the :meth:`~RequiredSchemaUpdates.apply()` on
         the returned object.
 

diff --git a/dataikuapi/dss_plugin_mlflow/utils.py b/dataikuapi/dss_plugin_mlflow/utils.py
@@ -42,13 +42,16 @@ def __init__(self, client, project, managed_folder, host=None):
             )
         sys.path.insert(0, self.tempdir)
 
-        # Reload the artifact_repository_registry in case MLflow was imported beforehand
         if sys.version_info > (3, 4):
             from importlib import reload
         import mlflow
-        reload(mlflow.store.artifact.artifact_repository_registry)
+        import mlflow.store.artifact.artifact_repository_registry
+        import mlflow.tracking.request_header.registry
+        reload(mlflow.tracking.request_header.registry) # Reload the request_header registry in case MLflow was imported beforehand
+        reload(mlflow.store.artifact.artifact_repository_registry) # Reload the artifact_repository_registry in case MLflow was imported beforehand
         mlflow.set_tracking_uri(None)  # if user has changed tracking backend manually before
         mlflow.end_run()  # if user already created a run with another tracking backend
+        self.remove_dataiku_duplicates_in_request_header_registry()
 
         # Setup authentication
         if client._session.auth is not None:
@@ -114,6 +117,27 @@ def __init__(self, client, project, managed_folder, host=None):
         })
         self.override_env(self.mlflow_env)
 
+
+    def remove_dataiku_duplicates_in_request_header_registry(self):
+        try:
+            # Try to make sure we are not loading our MLflow plugin twice
+            # see https://app.shortcut.com/dataiku/story/210232
+            from mlflow.tracking.request_header.registry import _request_header_provider_registry
+            registry = _request_header_provider_registry._registry
+            seen = set()
+            i = 0
+            while i < len(registry):
+                entrypoint = registry[i]
+                entrypoint_name = "{}.{}".format(entrypoint.__class__.__module__, entrypoint.__class__.__name__)
+                if entrypoint_name in seen and 'dataikuapi' in entrypoint_name:
+                    registry.pop(i)
+                else:
+                    seen.add(entrypoint_name)
+                    i += 1
+        except Exception as e:
+            logging.warning(str(e), exc_info=True)
+
+
     def clear(self):
         shutil.rmtree(self.tempdir)
         self.restore_env(self.mlflow_env)

diff --git a/setup.py b/setup.py
@@ -2,7 +2,7 @@
 
 from setuptools import setup
 
-VERSION = "13.2.1"
+VERSION = "13.2.2"
 
 long_description = (open('README').read() + '\n\n' +
                     open('HISTORY.txt').read())