pip install pandas
import os
import torch
import clip
from PIL import Image
import pandas as pd
# 加载预训练的CLIP模型
device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load("ViT-B/32", device=device)
# 图像文件夹路径
folder_path = '/path/to/folder' # 将此路径替换为图像文件夹的实际路径
# 创建空DataFrame用于存储图像特征
df = pd.DataFrame()
# 遍历图像文件夹
for filename in os.listdir(folder_path):
if filename.endswith('.jpg') or filename.endswith('.png'): # 仅处理.jpg和.png格式的图像文件
image_path = os.path.join(folder_path, filename)
# 加载和预处理图像
image = Image.open(image_path).convert('RGB')
image_resized = image.resize((28, 28))
image_input = preprocess(image_resized).unsqueeze(0).to(device)
# 图像编码
with torch.no_grad():
image_features = model.encode_image(image_input)
# 将图像特征添加到DataFrame中
image_features_list = image_features.squeeze().tolist()
df[filename] = image_features_list
# 在DataFrame的最后一列添加全为0的标记列
df['label'] = 0
# 将DataFrame保存到Excel文件
output_path = '/path/to/output.xlsx' # 将此路径替换为输出Excel文件的实际路径
df.to_excel(output_path, index=False)
/home/wangzhenkuan/CLIP_image_encoder.py:33: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
df[filename] = image_features_list
/home/wangzhenkuan/CLIP_image_encoder.py:33: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
df[filename] = image_features_list
/home/wangzhenkuan/CLIP_image_encoder.py:33: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
df[filename] = image_features_list
/home/wangzhenkuan/CLIP_image_encoder.py:33: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
df[filename] = image_features_list
import os
import torch
import clip
from PIL import Image
import pandas as pd
# 加载预训练的CLIP模型
device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load("ViT-B/32", device=device)
# 图像文件夹路径
folder_path = 'segment-anything-main/notebooks/output/' # 将此路径替换为图像文件夹的实际路径
# 创建空DataFrame用于存储图像特征
df = pd.DataFrame()
# 创建空列表用于存储图像特征
feature_list = []
# 遍历图像文件夹
for filename in os.listdir(folder_path):
if filename.endswith('.jpg') or filename.endswith('.png'): # 仅处理.jpg和.png格式的图像文件
image_path = os.path.join(folder_path, filename)
# 加载和预处理图像
image = Image.open(image_path).convert('RGB')
image_resized = image.resize((28, 28))
image_input = preprocess(image_resized).unsqueeze(0).to(device)
# 图像编码
with torch.no_grad():
image_features = model.encode_image(image_input)
# 将图像特征添加到列表中
image_features_list = image_features.squeeze().tolist()
# 使用pd.concat(axis=1)将所有特征列连接起来
df = pd.DataFrame(feature_list).T
# 在DataFrame的最后一列添加全为0的标记列
df['label'] = 0
# 将DataFrame保存到Excel文件
output_path = 'output_negtive.xlsx' # 将此路径替换为输出Excel文件的实际路径
df.to_excel(output_path, index=False)
Traceback (most recent call last):
File "/home/wangzhenkuan/CLIP_image_encoder.py", line 43, in <module>
df.to_excel(output_path, index = False)
File "/home/miniconda3/envs/wzk_base/lib/python3.10/site-packages/pandas/core/generic.py", line 2252, in to_excel
File "/home/miniconda3/envs/wzk_base/lib/python3.10/site-packages/pandas/io/formats/excel.py", line 923, in write
raise ValueError(
ValueError: This sheet is too large! Your sheet size is: 512, 292549 Max sheet size is: 1048576, 16384
import os
import torch
import clip
from PIL import Image
import pandas as pd
# 加载预训练的CLIP模型
device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load("ViT-B/32", device=device)
# 图像文件夹路径
folder_path = 'segment-anything-main/notebooks/output/' # 将此路径替换为图像文件夹的实际路径
# 创建空DataFrame用于存储图像特征
df = pd.DataFrame()
# 创建空列表用于存储图像特征
feature_list = []
# 遍历图像文件夹
for filename in os.listdir(folder_path):
if filename.endswith('.jpg') or filename.endswith('.png'): # 仅处理.jpg和.png格式的图像文件
image_path = os.path.join(folder_path, filename)
# 加载和预处理图像
image = Image.open(image_path).convert('RGB')
image_resized = image.resize((28, 28))
image_input = preprocess(image_resized).unsqueeze(0).to(device)
# 图像编码
with torch.no_grad():
image_features = model.encode_image(image_input)
# 将图像特征添加到列表中
image_features_list = image_features.squeeze().tolist()
# 使用pd.concat(axis=1)将所有特征列连接起来
df = pd.DataFrame(feature_list).T
# 在DataFrame的最后一列添加全为0的标记列
df['label'] = 0
# 将DataFrame保存到Excel文件
output_path = 'output_negtive.csv'
df.to_excel(output_path, index=False)
出现 “ValueError: No engine for filetype: ‘csv’” 错误通常是因为缺少适当的库或模块来处理 CSV 文件。这可能是由于 Pandas 版本较旧或缺少某些依赖项。
请尝试确保 Pandas 库已经正确安装,并检查是否缺少与 CSV 文件处理相关的其他库。您可以尝试更新 Pandas 版本或重新安装 Pandas 来解决此问题。
另外,您也可以尝试将输出文件类型更改为其他格式,例如 Excel (.xlsx) 文件,以确保代码能够正确运行。
import os
import torch
import clip
from PIL import Image
import pandas as pd
# 加载预训练的CLIP模型
device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load("ViT-B/32", device=device)
# 图像文件夹路径
folder_path = 'segment-anything-main/notebooks/output/' # 将此路径替换为图像文件夹的实际路径
# 批处理大小和Excel表格名称
batch_size = 10000 # 每个批次的图像数量
excel_prefix = 'output_batch_' # Excel表格的前缀名称
# 获取图像文件列表
image_files = [filename for filename in os.listdir(folder_path) if filename.endswith(('.jpg', '.png'))]
# 计算批次数量
num_batches = len(image_files) // batch_size + 1
for batch_idx in range(num_batches):
# 获取当前批次的图像文件列表
start_idx = batch_idx * batch_size
end_idx = min(start_idx + batch_size, len(image_files))
batch_files = image_files[start_idx:end_idx]
# 创建空DataFrame用于存储图像特征
df = pd.DataFrame()
feature_list = []
# 遍历当前批次的图像文件
for filename in batch_files:
image_path = os.path.join(folder_path, filename)
# 加载和预处理图像
image = Image.open(image_path).convert('RGB')
image_resized = image.resize((28, 28))
image_input = preprocess(image_resized).unsqueeze(0).to(device)
# 图像编码
with torch.no_grad():
image_features = model.encode_image(image_input)
# 将图像特征添加到DataFrame中
image_features_list = image_features.squeeze().tolist()
df = pd.DataFrame(feature_list).T
# 在DataFrame的最后一列添加全为0的标记列
df['label'] = 0
# 生成当前批次的Excel表格
excel_filename = f"{excel_prefix}{batch_idx + 1}.xlsx"
output_path = os.path.join(folder_path, excel_filename)
df.to_excel(output_path, index=False)
print(f"Batch {batch_idx + 1} processed. Excel file saved: {excel_filename}")