对二维数据任意位置的空缺值进行近邻填充。
代码实现,其中指定空缺值,指定近邻的数量
import numpy as np
# 定义一个函数来填充-23750为空值
def fill_empty_with_mean(arr, empty_value,min_neighbors):
# 获取数组的行数和列数
rows, cols = arr.shape
# 遍历数组
for i in range(rows):
for j in range(cols):
if arr[i, j] == empty_value:
# 获取周围非空元素的均值
neighbors = []
for x in [-1, 0, 1]:
for y in [-1, 0, 1]:
if 0 <= i + x < rows and 0 <= j + y < cols and arr[i + x, j + y] != empty_value:
neighbors.append(arr[i + x, j + y])
# 计算均值并填充
if len(neighbors) >=min_neighbors:
# print(neighbors)
# print(np.mean(neighbors))
arr[i, j] = np.mean(neighbors)
if __name__ == '__main__':
# 创建一个包含-23750为空值的示例二维数组
array = np.array([[1, -23750, 1, 1, 1],
[1, -23750, 1, 1, -23750],
[-23750, 2, 9, -1, 1.0],
[-1, -1, -23750, 5, -23750]])
# 使用函数填充-23750为空值
empty_value = -23750
min_neighbors = 3
fill_empty_with_mean(array, empty_value,min_neighbors)
# 输出填充后的数组
print(array)
对二维数据任意位置的空缺值进行近邻填充(去掉边缘)。
代码实现,其中指定空缺值,指定近邻的数量
import numpy as np
def fill_empty_with_mean(arr, empty_value, min_neighbors=5):
# 获取数组的行数和列数
rows, cols = arr.shape
# 遍历数组
for i in range(1, rows - 1): # 从第2行到倒数第2行
for j in range(1, cols - 1): # 从第2列到倒数第2列
if arr[i, j] == empty_value:
# 获取周围非空元素的个数
neighbors = [arr[i + x, j + y] for x in [-1, 0, 1] for y in [-1, 0, 1] if
arr[i + x, j + y] != empty_value]
# 如果周围非空元素的个数大于等于min_neighbors,计算均值并填充
if len(neighbors) >= min_neighbors:
arr[i, j] = np.mean(neighbors)
return arr
if __name__ == '__main__':
# 创建一个包含-23750为空值的示例二维数组
array = np.array([[1, -23750, 1, 1, 1],
[1, -23750, 1, 1, -23750],
[-23750, 1, 7, -1, 1],
[-1, -1, -23750, 5, 6]])
# 输出填充后的数组
# print(array)
# 使用函数填充-23750为空值,只有周围至少有5个非空值时才填充
empty_value = -23750
min_neighbors = 5
arr = fill_empty_with_mean(array, empty_value, min_neighbors)
# 输出填充后的数组
print(arr)
代码实现
import numpy as np
def fill_edge_empty_with_mean(arr, empty_value, min_neighbors=5):
# 获取数组的行数和列数
rows, cols = arr.shape
# 遍历第一行
for j in range(1, cols - 1):
if arr[0, j] == empty_value:
# 获取周围非空元素的个数
neighbors = [arr[0 + x, j + y] for x in [0, 1] for y in [-1, 0, 1] if arr[0 + x, j + y] != empty_value]
print(neighbors)
# 如果周围非空元素的个数大于等于min_neighbors,计算均值并填充
if len(neighbors) >= min_neighbors:
arr[0, j] = np.mean(neighbors)
print(float(np.mean(neighbors)))
#
# 遍历最后一行
for j in range(1, cols - 1):
if arr[rows - 1, j] == empty_value:
# 获取周围非空元素的个数
neighbors = [arr[rows - 1 + x, j + y] for x in [-1, 0] for y in [-1, 0, 1] if arr[rows - 1 + x, j + y] != empty_value]
print(neighbors)
# 如果周围非空元素的个数大于等于min_neighbors,计算均值并填充
if len(neighbors) >= min_neighbors:
arr[rows - 1, j] = np.mean(neighbors)
print(float(np.mean(neighbors)))
#
# 遍历第一列
for i in range(1, rows - 1):
if arr[i, 0] == empty_value :
neighbors = [arr[i + x, 0 + y] for x in [-1, 0, 1] for y in [0, 1] if arr[i + x, 0 + y] != empty_value]
print(neighbors)
# 如果周围非空元素的个数大于等于min_neighbors,计算均值并填充
if len(neighbors) >= min_neighbors:
arr[i, 0] = np.mean(neighbors)
print(float(np.mean(neighbors)))
# 遍历最后一列
for i in range(1, rows - 1):
if arr[i, cols - 1] == empty_value:
# 获取周围非空元素的个数
neighbors = [arr[i + x, cols-1 + y] for x in [-1, 0, 1] for y in [-1, 0] if arr[i + x, cols-1 + y] != empty_value]
print(neighbors)
# 如果周围非空元素的个数大于等于min_neighbors,计算均值并填充
if len(neighbors) >= min_neighbors:
arr[i, cols - 1] = np.mean(neighbors)
print(float(np.mean(neighbors)))
return arr
if __name__ == '__main__':
# 创建一个包含-23750为空值的示例二维数组
array = np.array([[1, -23750, 1, 1, 1],
[1, -1, 1, 1, -23750],
[-23750, 1, 7, -1, 1],
[-1, -1, -23750, 5, 6.0]])
# 输出填充后的数组
# print(array)
# 使用函数填充-23750为空值,只有周围至少有5个非空值时才填充
empty_value = -23750
min_neighbors = 3
arr = fill_edge_empty_with_mean(array, empty_value, min_neighbors)
# 输出填充后的数组
print(arr)