feast/sdk/python/feast/image_utils.py at feast-dataframe-phase2 · feast-dev/feast

History

272 lines (225 loc) · 9.03 KB

Raw

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

271

272

# Licensed under the Apache License, Version 2.0 (the "License");

# you may not use this file except in compliance with the License.

# You may obtain a copy of the License at

# https://www.apache.org/licenses/LICENSE-2.0

# Unless required by applicable law or agreed to in writing, software

# distributed under the License is distributed on an "AS IS" BASIS,

# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

# See the License for the specific language governing permissions and

# limitations under the License.

"""

Image processing utilities for Feast image search capabilities.

Provides image embedding generation and combination functions for multi-modal search.

"""

import io

from typing import List

try:

import timm

import torch

from PIL import Image

from sklearn.preprocessing import normalize

from timm.data import resolve_data_config

from timm.data.transforms_factory import create_transform

_image_dependencies_available = True

except ImportError:

_image_dependencies_available = False

COMBINATION_STRATEGIES = ["weighted_sum", "concatenate", "average"]

def _check_image_dependencies():

"""Check if image processing dependencies are available."""

if not _image_dependencies_available:

raise ImportError(

"Image processing dependencies are not installed. "

"Please install with: pip install feast[image]"

)

class ImageFeatureExtractor:

"""

Extract image embeddings using pre-trained vision models.

This class uses timm (PyTorch Image Models) to generate embeddings

from images using pre-trained vision models like ResNet, ViT, etc.

Examples:

Basic usage::

extractor = ImageFeatureExtractor()

with open("image.jpg", "rb") as f:

image_bytes = f.read()

embedding = extractor.extract_embedding(image_bytes)

Using different models::

# ResNet-50

extractor = ImageFeatureExtractor("resnet50")

embedding = extractor.extract_embedding(image_bytes)

# ViT model

extractor = ImageFeatureExtractor("vit_base_patch16_224")

embedding = extractor.extract_embedding(image_bytes)

"""

def __init__(self, model_name: str = "resnet34"):

"""

Initialize with a pre-trained model.

Args:

model_name: Model name from timm library. Popular choices:

- "resnet34": Fast, good for general use (default)

- "resnet50": Better accuracy than ResNet-34

- "vit_base_patch16_224": Vision Transformer, high accuracy

- "efficientnet_b0": Good balance of speed and accuracy

- "mobilenetv3_large_100": Fast inference for mobile/edge

Raises:

ImportError: If image processing dependencies are not installed

RuntimeError: If the specified model cannot be loaded

"""

_check_image_dependencies()

try:

self.model_name = model_name

self.model = timm.create_model(

model_name, pretrained=True, num_classes=0, global_pool="avg"

)

self.model.eval()

config = resolve_data_config({}, model=model_name)

self.preprocess = create_transform(**config)

except Exception as e:

raise RuntimeError(f"Failed to load model '{model_name}': {e}")

def extract_embedding(self, image_bytes: bytes) -> List[float]:

"""

Extract embedding from image bytes.

Args:

image_bytes: Image data as bytes (JPEG, PNG, WebP, etc.)

Returns:

Normalized embedding vector as list of floats

Raises:

ValueError: If image cannot be processed or is invalid

"""

try:

image = Image.open(io.BytesIO(image_bytes)).convert("RGB")

input_tensor = self.preprocess(image).unsqueeze(0)

with torch.no_grad():

output = self.model(input_tensor)

feature_vector = output.squeeze().numpy()

normalized = normalize(feature_vector.reshape(1, -1), norm="l2")

return normalized.flatten().tolist()

except Exception as e:

raise ValueError(f"Failed to extract embedding from image: {e}")

def batch_extract_embeddings(

self, image_bytes_list: List[bytes]

) -> List[List[float]]:

"""

Extract embeddings from multiple images in batch for efficiency.

Args:

image_bytes_list: List of image data as bytes

Returns:

List of normalized embedding vectors

Raises:

ValueError: If any image cannot be processed

"""

embeddings = []

images = []

for image_bytes in image_bytes_list:

try:

image = Image.open(io.BytesIO(image_bytes)).convert("RGB")

preprocessed = self.preprocess(image)

images.append(preprocessed)

except Exception as e:

raise ValueError(f"Failed to preprocess image: {e}")

batch_tensor = torch.stack(images)

with torch.no_grad():

outputs = self.model(batch_tensor)

for output in outputs:

feature_vector = output.numpy()

normalized = normalize(feature_vector.reshape(1, -1), norm="l2")

embeddings.append(normalized.flatten().tolist())

return embeddings

def combine_embeddings(

text_embedding: List[float],

image_embedding: List[float],

strategy: str = "weighted_sum",

text_weight: float = 0.5,

image_weight: float = 0.5,

) -> List[float]:

"""

Combine text and image embeddings search.

This function provides several strategies for combining embeddings from

different modalities (text and image) into a single vector for search.

Args:

text_embedding: Text embedding vector

image_embedding: Image embedding vector

strategy: Combination strategy (default: "weighted_sum")

text_weight: Weight for text embedding (for weighted strategies)

image_weight: Weight for image embedding (for weighted strategies)

Returns:

Combined embedding vector as list of floats

Raises:

ValueError: If weights don't sum to 1.0 for weighted_sum strategy

Examples:

Weighted combination (emphasize image)::

combined = combine_embeddings(

[0.1, 0.2], [0.8, 0.9], # text_emb, image_emb

strategy="weighted_sum",

text_weight=0.3, image_weight=0.7

)

Concatenation for full information::

combined = combine_embeddings(

[0.1, 0.2], [0.8, 0.9], # text_emb, image_emb

strategy="concatenate"

)

"""

if strategy == "weighted_sum":

if abs(text_weight + image_weight - 1.0) > 1e-6:

raise ValueError(

"text_weight + image_weight must equal 1.0 for weighted_sum"

)

max_dim = max(len(text_embedding), len(image_embedding))

text_padded = text_embedding + [0.0] * (max_dim - len(text_embedding))

image_padded = image_embedding + [0.0] * (max_dim - len(image_embedding))

combined = [

text_weight * t + image_weight * i

for t, i in zip(text_padded, image_padded)

]

return combined

elif strategy == "concatenate":

return text_embedding + image_embedding

elif strategy == "average":

max_dim = max(len(text_embedding), len(image_embedding))

text_padded = text_embedding + [0.0] * (max_dim - len(text_embedding))

image_padded = image_embedding + [0.0] * (max_dim - len(image_embedding))

combined = [(t + i) / 2.0 for t, i in zip(text_padded, image_padded)]

return combined

else:

raise ValueError(

f"Unknown combination strategy: {strategy}. "

f"Supported strategies: {', '.join(COMBINATION_STRATEGIES)}"

)

def validate_image_format(image_bytes: bytes) -> bool:

"""

Validate that the provided bytes represent a valid image.

Args:

image_bytes: Image data as bytes

Returns:

True if valid image, False otherwise

"""

try:

with Image.open(io.BytesIO(image_bytes)) as img:

img.verify()

return True

except Exception:

return False

def get_image_metadata(image_bytes: bytes) -> dict:

"""

Extract metadata from image bytes.

Args:

image_bytes: Image data as bytes

Returns:

Dictionary with image metadata (format, size, mode, etc.)

Raises:

ValueError: If image cannot be processed

"""

try:

with Image.open(io.BytesIO(image_bytes)) as img:

return {

"format": img.format,

"mode": img.mode,

"width": img.width,

"height": img.height,

"size_bytes": len(image_bytes),

}

except Exception as e:

raise ValueError(f"Failed to extract image metadata: {e}")

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

FilesExpand file tree

image_utils.py

Latest commit

History

image_utils.py

File metadata and controls