Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions HISTORY.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@ Changelog
==========


13.2.2 (2024-11-04)
---------------------

* Initial release for DSS 13.2.2

13.2.1 (2024-10-17)
---------------------

Expand Down
42 changes: 39 additions & 3 deletions dataikuapi/dss/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,13 +150,17 @@ def add_text(self, text):
self.eq["queries"].append({"text": text})
return self

def add_image(self, image_base64):
def add_image(self, image):
"""
Add an image to the embedding query.

:param str image_base64: Image content, as a base 64 formatted string.
:param image: Image content as bytes or str (base64)
"""
self.eq["queries"].append({"inlineImage": image_base64})
if isinstance(image, str):
self.eq["queries"].append({"inlineImage": image})
elif isinstance(image, bytes):
import base64
self.eq["queries"].append({"inlineImage": base64.b64encode(image).decode("utf8")})
return self

def execute(self):
Expand Down Expand Up @@ -608,6 +612,20 @@ def with_mask(self, mode, image=None, text=None):
self.gq["maskImage"] = base64.b64encode(image).decode("utf8")
return self

@property
def inference_steps(self):
return self.gq.get("nbInferenceSteps", None)
@inference_steps.setter
def inference_steps(self, new_value):
self.gq["nbInferenceSteps"] = new_value

@property
def refiner_strength(self):
return self.gq.get("refinerStrength", None)
@refiner_strength.setter
def refiner_strength(self, new_value):
self.gq["refinerStrength"] = new_value

@property
def height(self):
return self.gq.get("height", None)
Expand Down Expand Up @@ -710,3 +728,21 @@ def first_image(self, as_type="bytes"):

else:
return self._raw["images"][0]["data"]

def get_images(self, as_type="bytes"):
"""
:return: The generated images.
:rtype: List[str]
"""

if not self.success:
raise Exception("Image generation did not succeed: %s" % self._raw["errorMessage"])

if len(self._raw["images"]) == 0:
raise Exception("Image generation succeeded but did not return any image")

if as_type == "bytes":
import base64
return [base64.b64decode(image["data"]) for image in self._raw["images"]]
else:
return [image["data"] for image in self._raw["images"]]
21 changes: 16 additions & 5 deletions dataikuapi/dss/ml.py
Original file line number Diff line number Diff line change
Expand Up @@ -4757,7 +4757,8 @@ def deploy_to_flow(self, model_id, model_name, train_dataset, test_dataset=None,
"POST", "/projects/%s/models/lab/%s/%s/models/%s/actions/deployToFlow" % (self.project_key, self.analysis_id, self.mltask_id, model_id),
body=obj)

def redeploy_to_flow(self, model_id, recipe_name=None, saved_model_id=None, activate=True):
def redeploy_to_flow(self, model_id, recipe_name=None, saved_model_id=None, activate=True,
redo_optimization=False, redo_threshold_optimization=True, fixed_threshold=None):
"""
Redeploys a trained model from this ML Task to an existing saved model and training recipe in the flow.

Expand All @@ -4769,17 +4770,27 @@ def redeploy_to_flow(self, model_id, recipe_name=None, saved_model_id=None, acti
:param saved_model_id: Name of the saved model to update (defaults to **None**)
:type saved_model_id: str, optional
:param bool activate: If True (default), make the newly deployed model version become the active version
:param bool redo_optimization: Whether to re-run the model optimization (hyperparameter search) on every train
:param bool redo_threshold_optimization: Whether to redo the model threshold Optimization on every train (for binary classification models)
:param fixed_threshold: Value to use as fixed threshold. Must be set if redoThresholdOptimization is False (for binary classification models)
:type fixed_threshold: float, optional
:return: A dict containing: "impactsDownstream" - whether the active saved mode version changed and downstream recipes are impacted
:rtype: dict
"""
obj = {
"recipeName" : recipe_name,
"savedModelId" : saved_model_id,
"activate" : activate
"recipeName": recipe_name,
"savedModelId": saved_model_id,
"activate": activate,
"redoOptimization": redo_optimization,
"redoThresholdOptimization": redo_threshold_optimization
}

if fixed_threshold is not None:
obj["fixedThreshold"] = fixed_threshold

return self.client._perform_json(
"POST", "/projects/%s/models/lab/%s/%s/models/%s/actions/redeployToFlow" % (self.project_key, self.analysis_id, self.mltask_id, model_id),
body = obj)
body=obj)

def remove_unused_splits(self):
"""
Expand Down
2 changes: 1 addition & 1 deletion dataikuapi/dss/recipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ def compute_schema_updates(self):
Computes which updates are required to the outputs of this recipe.

This method only computes which changes would be needed to make the schema of the outputs
of the reicpe match the actual schema that the recipe will produce. To effectively apply
of the recipe match the actual schema that the recipe will produce. To effectively apply
these changes to the outputs, you can use the :meth:`~RequiredSchemaUpdates.apply()` on
the returned object.

Expand Down
28 changes: 26 additions & 2 deletions dataikuapi/dss_plugin_mlflow/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,13 +42,16 @@ def __init__(self, client, project, managed_folder, host=None):
)
sys.path.insert(0, self.tempdir)

# Reload the artifact_repository_registry in case MLflow was imported beforehand
if sys.version_info > (3, 4):
from importlib import reload
import mlflow
reload(mlflow.store.artifact.artifact_repository_registry)
import mlflow.store.artifact.artifact_repository_registry
import mlflow.tracking.request_header.registry
reload(mlflow.tracking.request_header.registry) # Reload the request_header registry in case MLflow was imported beforehand
reload(mlflow.store.artifact.artifact_repository_registry) # Reload the artifact_repository_registry in case MLflow was imported beforehand
mlflow.set_tracking_uri(None) # if user has changed tracking backend manually before
mlflow.end_run() # if user already created a run with another tracking backend
self.remove_dataiku_duplicates_in_request_header_registry()

# Setup authentication
if client._session.auth is not None:
Expand Down Expand Up @@ -114,6 +117,27 @@ def __init__(self, client, project, managed_folder, host=None):
})
self.override_env(self.mlflow_env)


def remove_dataiku_duplicates_in_request_header_registry(self):
try:
# Try to make sure we are not loading our MLflow plugin twice
# see https://app.shortcut.com/dataiku/story/210232
from mlflow.tracking.request_header.registry import _request_header_provider_registry
registry = _request_header_provider_registry._registry
seen = set()
i = 0
while i < len(registry):
entrypoint = registry[i]
entrypoint_name = "{}.{}".format(entrypoint.__class__.__module__, entrypoint.__class__.__name__)
if entrypoint_name in seen and 'dataikuapi' in entrypoint_name:
registry.pop(i)
else:
seen.add(entrypoint_name)
i += 1
except Exception as e:
logging.warning(str(e), exc_info=True)


def clear(self):
shutil.rmtree(self.tempdir)
self.restore_env(self.mlflow_env)
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from setuptools import setup

VERSION = "13.2.1"
VERSION = "13.2.2"

long_description = (open('README').read() + '\n\n' +
open('HISTORY.txt').read())
Expand Down