matlab读取Excel中文本型数字

关键代码内容:

clc;
clear;
Path=fullfile('D:\Data\20210128PM2.5');
File=dir(fullfile(Path,'*.xlsx'));
FileNames = {File.name};
Length_Names = size(FileNames,2);
for k = 1:Length_Names
    K_Trace = strcat(Path,'\' ,FileNames(k));
    [num,txt,raw] =  xlsread(char(K_Trace),'sheet1','G2:G1496');
    idx = find(strcmp(raw,'—'));
    nan = num2cell('0');
    raw(idx)=nan; 
    pmdata = cellfun(@str2num,raw(1:end));
    PM_All(:,k) = pmdata;
    fprintf('已读取第%d个文件\n',k)  
end
fprintf('共读取%d个文件\n',k)

代码解释:

  1. 读取文件夹下所有后缀为xlsx的文件
Path=fullfile('D:\Data\20210128PM2.5');
File=dir(fullfile(Path,'*.xlsx'));
FileNames = {File.name};
Length_Names = size(FileNames,2);
  1. 在Excel中以文本格式保存的数值,使用xlsread读取,只能在raw中获取。
  2. strcmp函数找内容为“—”的下标,使用raw(idx)=nan将其替换为0。
  3. cellfun为cell的函数,对cell里的元素执行str2num函数,最终得到double类型的矩阵
[num,txt,raw] =  xlsread(char(K_Trace),'sheet1','G2:G1496');
idx = find(strcmp(raw,'—'));
nan = num2cell('0');
raw(idx)=nan; 
pmdata = cellfun(@str2num,raw(1:end));

Excel公式对非零元素求均值

=AVERAGEIF(F2:EPZ2,"<>0")

插曲:
尝试分块处理

clc;
clear;
Path=fullfile('D:/Data/20210128PM2.5');
File=dir(fullfile(Path,'*.xlsx'));
FileNames = {File.name};
Length_Names = size(FileNames,2);
Length_Names_chunk = ceil(Length_Names/10);
for chunk = 1:10
    i = 1;
    if chunk == 10
        for k = Length_Names_chunk*(chunk-1)+1:Length_Names
            K_Trace = strcat(Path,'\' ,FileNames(k));
            [num,txt,raw] =  xlsread(char(K_Trace),'sheet1','G2:G1496');
            idx = find(strcmp(raw,'—'));
            nan = num2cell('0');
            raw(idx)=nan; 
            pmdata = cellfun(@str2num,raw(1:end));
            PM_All(:,i) = pmdata;
            i = i + 1;
            fprintf('已读取第%d个文件\n',k)  
        end
    else
        for k = Length_Names_chunk*(chunk-1)+1:Length_Names_chunk*chunk
            K_Trace = strcat(Path,'\' ,FileNames(k));
            [num,txt,raw] =  xlsread(char(K_Trace),'sheet1','G2:G1496');
            idx = find(strcmp(raw,'—'));
            nan = num2cell('0');
            raw(idx)=nan; 
            pmdata = cellfun(@str2num,raw(1:end));
            PM_All(:,i) = pmdata;
            i = i + 1;
            fprintf('已读取第%d个文件\n',k)  
        end
    end
    NewFile_Path = sprintf('D:/Data/20210128PM2.5/output/output%d.xlsx',chunk);
    xlswrite(NewFile_Path,PM_All)
    fprintf('第%d个分块读取结束\n',chunk)
    clearvars PM_All
end

避免报错,用try……catch处理

clc;
clear;
Path=fullfile('D:\Data\20210128PM2.5');
File=dir(fullfile(Path,'*.xlsx'));
FileNames = {File.name};
Length_Names = size(FileNames,2);
% All_PM_Data = zeros(1495,Length_Names);
error_num = 0;
for k = 1:Length_Names
    try 
        K_Trace = strcat(Path,'\' ,FileNames(k));
        [num,txt,raw] =  xlsread(char(K_Trace),'sheet1','G2:G1496');
        idx = find(strcmp(raw,'—'));
        nan = num2cell('0');
        raw(idx) = nan; 
        pmdata = cellfun(@str2num,raw(1:end));
        PM_All(:,k) = pmdata;
        fprintf('已读取第%d个文件\n',k) 
    catch
        fprintf('第%d个文件有问题!!!!!\n',k)
        error_text = fopen('D:\Data\20210128PM2.5\output\error_log.txt','a');
        fprintf(error_text,'%s\n',K_Trace);
        fclose(error_text);
        error_num = error_num+1;
        continue
    end
end
fprintf('共读取%d个文件\n',k)
fprintf('共%d个文件读取有问题\n',error_num)
NewFile_Path = 'D:\Data\20210128PM2.5\output\output.xlsx';
xlswrite(NewFile_Path,PM_All)

你可能感兴趣的:(matlab,数据分析)