From 955bfc818c1de112e20424b75798f865022554f3 Mon Sep 17 00:00:00 2001 From: "Juhil Dungrani (dungranij)" Date: Mon, 5 Jul 2021 16:33:56 +0100 Subject: [PATCH] test data --- CreateTestData_Text.m | 39 +++++++++++++++++++++++++++ TestSolutions.m | 52 ++++++++++++++++++++++++++++++++++++ TestText.m | 61 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 152 insertions(+) create mode 100644 CreateTestData_Text.m create mode 100644 TestSolutions.m create mode 100644 TestText.m diff --git a/CreateTestData_Text.m b/CreateTestData_Text.m new file mode 100644 index 0000000..475311a --- /dev/null +++ b/CreateTestData_Text.m @@ -0,0 +1,39 @@ +%% Replaces one hours worth of data with empty strings +clear all +close all + +FileIn = '.\Model\o3_surface_20180701000000.nc'; +C = ncinfo(FileIn); +VarNames = {C.Variables.Name}; + + +%% Move to new *.nc file +FileOut = 'TestyTest.nc'; +nccreate(FileOut, 'lat', 'Dimensions', {'lat', 400}, 'DataType', 'single'); +ncwrite(FileOut, 'lat', ncread(FileIn, 'lat')); +nccreate(FileOut, 'lon', 'Dimensions', {'lon', 700}, 'DataType', 'single'); +ncwrite(FileOut, 'lon', ncread(FileIn, 'lon')); +nccreate(FileOut, 'hour', 'Dimensions', {'hour', 25}, 'DataType', 'single'); +ncwrite(FileOut, 'hour', ncread(FileIn, 'hour')); + +Model2Change = 6; % Select the model that will be overwritten with errors + +for idx = 1:7 + if idx ~= Model2Change + Var = ncread(FileIn, VarNames{idx}); + nccreate('TestyTest.nc', VarNames{idx},... + 'Dimensions', { 'lon', 700, 'lat', 400, 'hour', 25},... + 'DataType', 'single'); + ncwrite('TestyTest.nc', VarNames{idx}, Var); + else + Var = ncread(FileIn, VarNames{idx}); + nccreate('TestyTest.nc', VarNames{idx},... + 'Dimensions', { 'lon', 700, 'lat', 400, 'hour', 25},... + 'DataType', 'char'); + var = char(Var); + ncwrite('TestyTest.nc', VarNames{idx}, var); + end + + +end + diff --git a/TestSolutions.m b/TestSolutions.m new file mode 100644 index 0000000..b6192d0 --- /dev/null +++ b/TestSolutions.m @@ -0,0 +1,52 @@ +%% Script to examine NetCDF data formats and check for NaN +% Note, you would carry out this test each time you load data. +% You should NOT test the whole file at the start + +clear all +close all + + +%% Test File with Errors +NaNErrors = 0; +%% Set file to test +FileName = '/Users/juhildungrani/Desktop/Sem 4/Big Data/work/5011CEM2021-dungranij/2a Data Exploration/o3_surface_20180701000000.nc'; + % define our test file +% FileName = '../Model/o3_surface_20180701000000.nc'; % un rem this line to see what happens with good data +Contents = ncinfo(FileName); % Store the file content information in a variable. + +StartLat = 1; +StartLon = 1; + +fprintf('Testing files: %s\n', FileName) +for idxHour = 1:25 + + for idxModel = 1:8 + Data(idxModel,:,:) = ncread(FileName, Contents.Variables(idxModel).Name,... + [StartLat, StartLon, idxHour], [inf, inf, 1]); + end + + % check for NaNs + if any(isnan(Data), 'All') + NaNErrors = 1; + %% display warning + fprintf('NaNs present\n') + ErrorModel = find(isnan(Data), 1, 'first'); + %% find first error: + + fprintf('Analysis for hour %i is invalid, NaN errors recorded in model %s\n',... + idxHour, Contents.Variables(ErrorModel).Name) + + %% Analysis will crash if we continue! + % Set all data to zero, so analysis failure is obvious. + Data = zeros(size(Data)); + % continue and carry out analysis? + % Other techniques may be better, e.g. set all to (-1), or skip + % analysis and set results which will be quicker, e.g: + % Results = -(ones(700, 400)); + end + +end + +if ~NaNErrors + fprintf('No errors!\n') +end \ No newline at end of file diff --git a/TestText.m b/TestText.m new file mode 100644 index 0000000..4cf50e8 --- /dev/null +++ b/TestText.m @@ -0,0 +1,61 @@ +%% Script to examine NetCDF data formats and check for non-numeric values (chars only) + +clear all +close all + +%% Define plain text variable types +DataTypes = {'NC_Byte', 'NC_Char', 'NC_Short', 'NC_Int', 'NC_Float', 'NC_Double'}; + +%% Test a good file +%% Set file to test +FileName = '../Model/o3_surface_20180701000000.nc'; % define our test file + +Contents = ncinfo(FileName); % Store the file content information in a variable. +FileID = netcdf.open(FileName,'NC_NOWRITE'); % open file read only and create handle + +for idx = 0:size(Contents.Variables,2)-1 % loop through each variable + % read data type for each variable and store + [~, datatype(idx+1), ~, ~] = netcdf.inqVar(FileID,idx); +end + +%% display data types +DataInFile = DataTypes(datatype)' + +%% find character data types +FindText = strcmp('NC_Char', DataInFile); + +%% print results +fprintf('Testing file: %s\n', FileName) +if any(FindText) + fprintf('Error, text variables present:\n') +else + fprintf('All data is numeric, continue analysis.\n') +end + +%% ##### + +%% Test File with Errors +%% Set file to test + FileName = '../Model/TestFileText.nc'; % define our test file + + Contents = ncinfo(FileName); % Store the file content information in a variable. + FileID = netcdf.open(FileName,'NC_NOWRITE'); % open file read only and create handle + + for idx = 0:size(Contents.Variables,2)-1 % loop through each variable + % read data type for each variable and store + [~, datatype(idx+1), ~, ~] = netcdf.inqVar(FileID,idx); + end + + %% display data types + DataInFile = DataTypes(datatype)' + + %% find character data types + FindText = strcmp('NC_Char', DataInFile); + + %% print results + fprintf('Testing file: %s\n', FileName) + if any(FindText) + fprintf('Error, text variables present:\n') + else + fprintf('All data is numeric, continue analysis.\n') + end \ No newline at end of file