diff --git a/2a Data Exploration/ExploreData.m b/2a Data Exploration/ExploreData.m new file mode 100644 index 0000000..2d8fe80 --- /dev/null +++ b/2a Data Exploration/ExploreData.m @@ -0,0 +1,52 @@ +%% This script allows you to open and explore the data in a *.nc file +clear all +close all + +FileName = '..\Model\o3_surface_20180701000000.nc'; + +Contents = ncinfo(FileName); + +%% List dimensions names +% if you select dimension 1, then tyou are selecting along the 'longitude' +% for our project we will select along the 'ntim' or 'time' dimension +fprintf('Data Dimension Names: %s, %s, %s\n',... + Contents.Dimensions(1).Name,... + Contents.Dimensions(2).Name,... + Contents.Dimensions(3).Name) + +%% List variable names +% note that variable 3 is an ensemble, we will NOT use this in our project! +% Not each model is 700 x 400 x 12 and we know that lat, lon and time match +% these numbers. +% To visulaise this 3D array think of it as a 700 x 400 grid for each model +% for one hour. These are stacked up 25 high. + +NumVariables = size(Contents.Variables,2); +fprintf('Variable names and sizes:\n') +for idx = 1: NumVariables + fprintf('%i %s %i, %i, %i',... + idx, Contents.Variables(idx).Name, Contents.Variables(idx).Size); + fprintf('\n'); +end + + +%% Selecting data +% We want to load models only, i.e. variables 1, 2, 4, 5, 6, 7, 8 +% and we only want a single hour. We use indexing into our *.nc file: +% To load the variable 'chimere_ozone, starting at lat = 1, lon = 1 and +% hour = 1 we use: +StartLat = 1; +NumLat = 400; +StartLon = 1; +NumLon = 700; +StartHour = 1; +NumHour = 1; + +Data = ncread(FileName, 'chimere_ozone', [StartLon, StartLat, StartHour], [NumLon, NumLat, NumHour]); + +%% Cycling through the variable names +% We only want the models to load + +for idx = [1, 2, 4, 5, 6, 7, 8] + fprintf('Model %i : %s\n', idx, Contents.Variables(idx).Name); +end diff --git a/2b Memory/MemorySaving.m b/2b Memory/MemorySaving.m new file mode 100644 index 0000000..b9c962b --- /dev/null +++ b/2b Memory/MemorySaving.m @@ -0,0 +1,76 @@ +%% This script allows you to open and explore the data in a *.nc file +clear all % clear all variables +close all % close all windows + +FileName = '..\Model\o3_surface_20180701000000.nc'; % define the name of the file to be used, the path is included + +Contents = ncinfo(FileName); % Store the file content information in a variable. + + +%% Section 2: Load all the model data together +for idx = 1: 8 + AllData(idx,:,:,:) = ncread(FileName, Contents.Variables(idx).Name); + fprintf('Loading %s\n', Contents.Variables(idx).Name); % display loading information +end + +AllDataMem = whos('AllData').bytes/1000000; +fprintf('Memory used for all data: %.3f MB\n', AllDataMem) + +%% Section 3: Loading all the data for a single hour from all the models +% We combine the aboce code to cycle through the names and load each model. +% We load the data into successive 'layers' using 'idx', and let the other +% two dimensions take care of themselves by using ':' +StartLat = 1; % starting latitude +NumLat = 400; % number of latitude positions +StartLon = 1; % starying longitude +NumLon = 700; % number of lingitude positions +StartHour = 1; % starting time for analyises +NumHour = 1; % Number of hours of data to load + +% loop through the models loading *ALL* the data into an array +Models2Load = [1, 2, 4, 5, 6, 7, 8]; % list of models to load +idxModel = 0; % current model +for idx = 1:7 + idxModel = idxModel + 1; % move to next model index + LoadModel = Models2Load(idx); % which model to load + ModelData(idxModel,:,:,:) = ncread(FileName, Contents.Variables(LoadModel).Name,... + [StartLon, StartLat, StartHour], [NumLon, NumLat, NumHour]); + fprintf('Loading %s\n', Contents.Variables(LoadModel).Name); % display loading information +end + +HourDataMem = whos('ModelData').bytes/1000000; +fprintf('Memory used for 1 hour of data: %.3f MB\n', HourDataMem) + +%% Section 4: Cycle through the hours and load all the models for each hour and record memory use +% We use an index named 'StartHour' in our loop +HourMem = 0; % storage variable for the maximum memory in use by our data variable +StartLat = 1; % starting latitude +NumLat = 400; % number of latitude positions +StartLon = 1; % starying longitude +NumLon = 700; % number of lingitude positions +% StartHour = 1; % starting time for analyises +NumHour = 1; % Number of hours of data to load + +% loop through the hours loading one at a time +for StartHour = 1:25 + Models2Load = [1, 2, 4, 5, 6, 7, 8]; % list of models to load + idxModel = 0; % current model + for idx = 1:7 + idxModel = idxModel + 1; % move to next model index + LoadModel = Models2Load(idx);% which model to load + HourlyData(idxModel,:,:,:) = ncread(FileName, Contents.Variables(LoadModel).Name,... + [StartLon, StartLat, StartHour], [NumLon, NumLat, NumHour]); + fprintf('Loading %s\n', Contents.Variables(LoadModel).Name); % display loading information + end + + % Record the maximum memory used by the data variable so far + HourMem = max( [ HourMem, whos('HourlyData').bytes/1000000 ] ); + fprintf('Loaded Hour %i, memory used: %.3f MB\n', StartHour, HourMem); % display loading information +end + +%% Section 5: Print our results +fprintf('\nResults:\n') +fprintf('Memory used for all data: %.2f MB\n', AllDataMem) +fprintf('Memory used for hourly data: %.2f MB\n', HourDataMem) +fprintf('Maximum memory used hourly = %.2f MB\n', HourMem) +fprintf('Hourly memory as fraction of all data = %.2f\n\n', HourMem / AllDataMem) \ No newline at end of file diff --git a/3a Sequential/SequentialProcessing.m b/3a Sequential/SequentialProcessing.m new file mode 100644 index 0000000..a936c92 --- /dev/null +++ b/3a Sequential/SequentialProcessing.m @@ -0,0 +1,69 @@ +%% This script allows you to open and explore the data in a *.nc file +clear all +close all + +FileName = '..\Model\o3_surface_20180701000000.nc'; + +Contents = ncinfo(FileName); + +Lat = ncread(FileName, 'lat'); % load the latitude locations +Lon = ncread(FileName, 'lon'); % loadthe longitude locations + +%% Processing parameters provided by customer +RadLat = 30.2016; % cluster radius value for latitude +RadLon = 24.8032; % cluster radius value for longitude +RadO3 = 4.2653986e-08; % cluster radius value for the ozone data + +%% Cycle through the hours and load all the models for each hour and record memory use +% We use an index named 'NumHour' in our loop +% The section 'sequential processing' will process the data location one +% after the other, reporting on the time involved. + +StartLat = 1; % latitude location to start laoding +NumLat = 400; % number of latitude locations ot load +StartLon = 1; % longitude location to start loading +NumLon = 700; % number of longitude locations ot load +tic +for NumHour = 1:25 % loop through each hour + fprintf('Processing hour %i\n', NumHour) + DataLayer = 1; % which 'layer' of the array to load the model data into + for idx = [1, 2, 4, 5, 6, 7, 8] % model data to load + % load the model data + HourlyData(DataLayer,:,:) = ncread(FileName, Contents.Variables(idx).Name,... + [StartLon, StartLat, NumHour], [NumLon, NumLat, 1]); + DataLayer = DataLayer + 1; % step to the next 'layer' + end + + % We need to prepare our data for processing. This method is defined by + % our customer. You are not required to understand this method, but you + % can ask your module leader for more information if you wish. + [Data2Process, LatLon] = PrepareData(HourlyData, Lat, Lon); + + %% Sequential analysis + t1 = toc; + t2 = t1; + for idx = 1: size(Data2Process,1) % step through each data location to process the data + + % The analysis of the data creates an 'ensemble value' for each + % location. This method is defined by + % our customer. You are not required to understand this method, but you + % can ask your module leader for more information if you wish. + [EnsembleVector(idx, NumHour)] = EnsembleValue(Data2Process(idx,:,:,:), LatLon, RadLat, RadLon, RadO3); + + % To monitor the progress we will print out the status after every + % 50 processes. + if idx/50 == ceil( idx/50) + tt = toc-t2; + fprintf('Total %i of %i, last 50 in %.2f s predicted time for all data %.1f s\n',... + idx, size(Data2Process,1), tt, size(Data2Process,1)/50*25*tt) + t2 = toc; + end + end + T2(NumHour) = toc - t1; % record the total processing time for this hour + fprintf('Processing hour %i - %.2f s\n\n', NumHour, sum(T2)); + + +end +tSeq = toc; + +fprintf('Total time for sequential processing = %.2f s\n\n', tSeq) \ No newline at end of file diff --git a/3b Parallel/ParallelProcessing.m b/3b Parallel/ParallelProcessing.m new file mode 100644 index 0000000..65ddae6 --- /dev/null +++ b/3b Parallel/ParallelProcessing.m @@ -0,0 +1,125 @@ +function ParallelProcessing +%% 1: Load Data +clear all +close all + +FileName = '..\Model\o3_surface_20180701000000.nc'; + +Contents = ncinfo(FileName); + +Lat = ncread(FileName, 'lat'); +Lon = ncread(FileName, 'lon'); +NumHours = 25; + +%% 2: Processing parameters +% ## provided by customer ## +RadLat = 30.2016; +RadLon = 24.8032; +RadO3 = 4.2653986e-08; + +StartLat = 1; +NumLat = 400; +StartLon = 1; +NumLon = 700; + +%% 3: Pre-allocate output array memory +% the '-4' value is due to the analysis method resulting in fewer output +% values than the input array. +NumLocations = (NumLon - 4) * (NumLat - 4); +EnsembleVectorPar = zeros(NumLocations, NumHours); % pre-allocate memory + +%% 4: Cycle through the hours and load all the models for each hour and record memory use +% We use an index named 'NumHour' in our loop +% The section 'parallel processing' will process the data location one +% after the other, reporting on the time involved. +tic +for idxTime = 1:NumHours + + %% 5: Load the data for each hour + % Each hour we read the data from the required models, defined by the + % index variable. Each model data are placed on a 'layer' of the 3D + % array resulting in a 7 x 700 x 400 array. + % We do this by indexing through the model names, then defining the + % start position as the beginnning of the Lat, beginning of the Lon and + % beginning of the new hour. We then define the number of elements + % along each data dimension, so the total number of Lat, the total + % number of Lon, but only 1 hour. + % You can use these values to select a smaller sub-set of the data if + % required to speed up testing o fthe functionality. + + DataLayer = 1; + for idx = [1, 2, 4, 5, 6, 7, 8] + HourlyData(DataLayer,:,:) = ncread(FileName, Contents.Variables(idx).Name,... + [StartLon, StartLat, idxTime], [NumLon, NumLat, 1]); + DataLayer = DataLayer + 1; + end + + %% 6: Pre-process the data for parallel processing + % This takes the 3D array of data [model, lat, lon] and generates the + % data required to be processed at each location. + % ## This process is defined by the customer ## + % If you want to know the details, please ask, but this is not required + % for the module or assessment. + [Data2Process, LatLon] = PrepareData(HourlyData, Lat, Lon); + + +%% Parallel Analysis + %% 7: Create the parallel pool and attache files for use + PoolSize = 2 ; % define the number of processors to use in parallel + if isempty(gcp('nocreate')) + parpool('local',PoolSize); + end + poolobj = gcp; + % attaching a file allows it to be available at each processor without + % passing the file each time. This speeds up the process. For more + % information, ask your tutor. + addAttachedFiles(poolobj,{'EnsembleValue'}); + +% %% 8: Parallel processing is difficult to monitor progress so we define a +% % special function to create a wait bar which is updated after each +% % process completes an analysis. The update function is defined at the +% % end of this script. Each time a parallel process competes it runs the +% % function to update the waitbar. + DataQ = parallel.pool.DataQueue; % Create a variable in the parallel pool +% +% % Create a waitbar and handle top it: + hWaitBar = waitbar(0, sprintf('Time period %i, Please wait ...', idxTime)); +% % Define the function to call when new data is received in the data queue +% % 'DataQ'. See end of script for the function definition. + afterEach(DataQ, @nUpdateWaitbar); + N = size(Data2Process,1); % the total number of data to process + p = 20; % offset so the waitbar shows some colour quickly. + + %% 9: The actual parallel processing! + % Ensemble value is a function defined by the customer to calculate the + % ensemble value at each location. Understanding this function is not + % required for the module or the assessment, but it is the reason for + % this being a 'big data' project due to the processing time (not the + % pure volume of raw data alone). + T4 = toc; + parfor idx = 1: 100 % size(Data2Process,1) + [EnsembleVectorPar(idx, idxTime)] = EnsembleValue(Data2Process(idx,:,:,:), LatLon, RadLat, RadLon, RadO3); + send(DataQ, idx); + end + + close(hWaitBar); % close the wait bar + + T3(idxTime) = toc - T4; % record the parallel processing time for this hour of data + fprintf('Parallel processing time for hour %i : %.1f s\n', idxTime, T3(idxTime)) + +end % end time loop +T2 = toc; +delete(gcp); + +%% 10: Reshape ensemble values to Lat, lon, hour format +EnsembleVectorPar = reshape(EnsembleVectorPar, 696, 396, []); +fprintf('Total processing time for %i workers = %.2f s\n', PoolSize, sum(T3)); + +%% 11: ### PROCESSING COMPLETE DATA NEEDS TO BE SAVED ### + +function nUpdateWaitbar(~) % nested function + waitbar(p/N, hWaitBar, sprintf('Hour %i, %.3f complete, %i out of %i', idxTime, p/N*100, p, N)); + p = p + 1; +end + +end % end function \ No newline at end of file diff --git a/3c Plotting/Graphs.m b/3c Plotting/Graphs.m new file mode 100644 index 0000000..93c14fc --- /dev/null +++ b/3c Plotting/Graphs.m @@ -0,0 +1,43 @@ +%% Plotting graphs in Matlab +clear all +close all + + +%% Show two plots on different y-axes +%% 250 data processed +x1Vals = [2, 3, 4, 5, 6, 7]; +y1Vals = [65, 56, 47, 44, 40, 39]; +figure(1) +yyaxis left +plot(x1Vals, y1Vals, '-bd') +xlabel('Number of Processors') +ylabel('Processing time (s)') +title('Processing time vs number of processors') + + +%% 5,000 data processed +x2Vals = [2, 3, 4, 5, 6, 7, 8]; +y2Vals = [1560, 1077, 945, 838, 852, 725, 707]; +figure(1) +yyaxis right +plot(x2Vals, y2Vals, '-rx') +xlabel('Number of Processors') +ylabel('Processing time (s)') +title('Processing time vs number of processors') + +legend('250 Data', '5,000 Data') + + +%% Show two plots on same y-axis +%% Mean processing time +y1MeanVals = y1Vals / 250; +y2MeanVals = y2Vals / 5000; + +figure(2) +plot(x1Vals, y1MeanVals, '-bd') +hold on +plot(x2Vals, y2MeanVals, '-rx') +xlabel('Number of Processors') +ylabel('Processing time (s)') +title('Mean Processing time vs number of processors') +legend('250 Data', '5,000 Data') \ No newline at end of file diff --git a/6a Testing Text/CreateTestData_Text.m b/6a Testing Text/CreateTestData_Text.m new file mode 100644 index 0000000..475311a --- /dev/null +++ b/6a Testing Text/CreateTestData_Text.m @@ -0,0 +1,39 @@ +%% Replaces one hours worth of data with empty strings +clear all +close all + +FileIn = '.\Model\o3_surface_20180701000000.nc'; +C = ncinfo(FileIn); +VarNames = {C.Variables.Name}; + + +%% Move to new *.nc file +FileOut = 'TestyTest.nc'; +nccreate(FileOut, 'lat', 'Dimensions', {'lat', 400}, 'DataType', 'single'); +ncwrite(FileOut, 'lat', ncread(FileIn, 'lat')); +nccreate(FileOut, 'lon', 'Dimensions', {'lon', 700}, 'DataType', 'single'); +ncwrite(FileOut, 'lon', ncread(FileIn, 'lon')); +nccreate(FileOut, 'hour', 'Dimensions', {'hour', 25}, 'DataType', 'single'); +ncwrite(FileOut, 'hour', ncread(FileIn, 'hour')); + +Model2Change = 6; % Select the model that will be overwritten with errors + +for idx = 1:7 + if idx ~= Model2Change + Var = ncread(FileIn, VarNames{idx}); + nccreate('TestyTest.nc', VarNames{idx},... + 'Dimensions', { 'lon', 700, 'lat', 400, 'hour', 25},... + 'DataType', 'single'); + ncwrite('TestyTest.nc', VarNames{idx}, Var); + else + Var = ncread(FileIn, VarNames{idx}); + nccreate('TestyTest.nc', VarNames{idx},... + 'Dimensions', { 'lon', 700, 'lat', 400, 'hour', 25},... + 'DataType', 'char'); + var = char(Var); + ncwrite('TestyTest.nc', VarNames{idx}, var); + end + + +end + diff --git a/6a Testing Text/TestText.m b/6a Testing Text/TestText.m new file mode 100644 index 0000000..ae44193 --- /dev/null +++ b/6a Testing Text/TestText.m @@ -0,0 +1,62 @@ +%% Script to examine NetCDF data formats and check for non-numeric values (chars only) + +clear all +close all + +%% Define plain text variable types +DataTypes = {'NC_Byte', 'NC_Char', 'NC_Short', 'NC_Int', 'NC_Float', 'NC_Double'}; + +%% Test a good file +%% Set file to test +FileName = '../Model/o3_surface_20180701000000.nc'; % define our test file + +Contents = ncinfo(FileName); % Store the file content information in a variable. +FileID = netcdf.open(FileName,'NC_NOWRITE'); % open file read only and create handle + +for idx = 0:size(Contents.Variables,2)-1 % loop through each variable + % read data type for each variable and store + [~, datatype(idx+1), ~, ~] = netcdf.inqVar(FileID,idx); +end + +%% display data types +DataInFile = DataTypes(datatype)' + +%% find character data types +FindText = strcmp('NC_Char', DataInFile); + +%% print results +fprintf('Testing file: %s\n', FileName) +if any(FindText) + fprintf('Error, text variables present:\n') +else + fprintf('All data is numeric, continue analysis.\n') +end + +%% ##### + +%% Test File with Errors +%% Set file to test + FileName = '../Model/TestFileText.nc'; % define our test file + + Contents = ncinfo(FileName); % Store the file content information in a variable. + FileID = netcdf.open(FileName,'NC_NOWRITE'); % open file read only and create handle + + for idx = 0:size(Contents.Variables,2)-1 % loop through each variable + % read data type for each variable and store + [~, datatype(idx+1), ~, ~] = netcdf.inqVar(FileID,idx); + end + + %% display data types + DataInFile = DataTypes(datatype)' + + %% find character data types + FindText = strcmp('NC_Char', DataInFile); + + %% print results + fprintf('Testing file: %s\n', FileName) + if any(FindText) + fprintf('Error, text variables present:\n') + else + fprintf('All data is numeric, continue analysis.\n') + end + diff --git a/6b Testing NaN/CreateTestData_NaN.m b/6b Testing NaN/CreateTestData_NaN.m new file mode 100644 index 0000000..ab754f3 --- /dev/null +++ b/6b Testing NaN/CreateTestData_NaN.m @@ -0,0 +1,20 @@ +%% Replaces one hours worth of data with NaN +clear all +close all + +OriginalFileName = './Model/o3_surface_20180701000000.nc'; +NewFileName = './Model/TestFileNaN.nc'; +copyfile(OriginalFileName, NewFileName); + +C = ncinfo(NewFileName); +ModelNames = {C.Variables(1:8).Name}; + + +%% Change data to NaN +BadData = NaN(700,400,1); + +%% Write to *.nc file +Hour2Replace = 12; +for idx = 1:8 + ncwrite(NewFileName, ModelNames{idx}, BadData, [1, 1, Hour2Replace]); +end diff --git a/6b Testing NaN/TestNan.m b/6b Testing NaN/TestNan.m new file mode 100644 index 0000000..8448c34 --- /dev/null +++ b/6b Testing NaN/TestNan.m @@ -0,0 +1,72 @@ +%% Script to examine NetCDF data formats and check for NaN +% Note, you would carry out this test each time you load data. +% You should NOT test the whole file at the start + +clear all +close all + + +%% Test a good file +NaNErrors = 0; +%% Set file to test +FileName = '../Model/o3_surface_20180701000000.nc'; % define our test file + +Contents = ncinfo(FileName); % Store the file content information in a variable. + +StartLat = 1; +StartLon = 1; + +for idxHour = 1:25 + + for idxModel = 1:8 + Data(idxModel,:,:) = ncread(FileName, Contents.Variables(idxModel).Name,... + [StartLat, StartLon, idxHour], [inf, inf, 1]); + end + + % check for NaNs + if any(isnan(Data), 'All') + fprintf('NaNs present\n') + NaNErrors = 1; + end +end + +fprintf('Testing files: %s\n', FileName) +if NaNErrors + fprintf('NaN errors present!\n') +else + fprintf('No errors!\n') +end + + + + +%% Test File with Errors +NaNErrors = 0; +%% Set file to test +FileName = '../Model/TestFileNaN.nc'; % define our test file + +Contents = ncinfo(FileName); % Store the file content information in a variable. + +StartLat = 1; +StartLon = 1; + +fprintf('Testing files: %s\n', FileName) +for idxHour = 1:25 + + for idxModel = 1:8 + Data(idxModel,:,:) = ncread(FileName, Contents.Variables(idxModel).Name,... + [StartLat, StartLon, idxHour], [inf, inf, 1]); + end + + % check for NaNs + if any(isnan(Data), 'All') + fprintf('NaNs present during hour %i\n', idxHour) + NaNErrors = 1; + end +end + +if NaNErrors + fprintf('NaN errors present!\n') +else + fprintf('No errors!\n') +end \ No newline at end of file diff --git a/Common Files/DDC_ver01_1_CAMS.m b/Common Files/DDC_ver01_1_CAMS.m new file mode 100644 index 0000000..4369583 --- /dev/null +++ b/Common Files/DDC_ver01_1_CAMS.m @@ -0,0 +1,214 @@ +function [ Clusters, Results ] = DDC_ver01_1_CAMS( varargin ) +%DDC_VER01.1 Data Density Based Clustering +% Copyright R Hyde 2017 +% Released under the GNU GPLver3.0 +% You should have received a copy of the GNU General Public License +% along with this program. If not, see 0 + % size(DataIn,1) % uncomment to trace remaining data + NumClusters=NumClusters+1; + Clusters.Rad(NumClusters,:)=InitR; + %% Find Cluster Centre + Glob_Mean=mean(DataIn,1); % array of means of data dim + Glob_Scalar=sum(sum((DataIn.*DataIn),2),1)/size(DataIn,1); % array of scalar products for each data dim + % full calculations +% GDensity=1./(1+(pdist2(DataIn,Glob_Mean,'euclidean').^2)+Glob_Scalar-(sum(Glob_Mean.^2))); % calculate global densities +% [~, CentreIndex]=max(GDensity); % find index of max densest point + % slim calculations + GDensity=pdist2(DataIn,Glob_Mean,'euclidean').^2 + Glob_Scalar - sum(Glob_Mean.^2); % calculate global densities + [~, CentreIndex]=min(GDensity); % find index of max densest point + + %% Find points belonging to cluster + Include=bsxfun(@minus,DataIn,DataIn(CentreIndex,:)).^2; % sum square of distances from centre + RadSq=Clusters.Rad(NumClusters,:).^2; % square radii + Include=sum(bsxfun(@rdivide,Include,RadSq),2); % divide by radii and add terms + Include=find(Include<1); + + %% Remove outliers >3sigma + Dist=pdist2(DataIn(Include,:),DataIn(CentreIndex,:)); % distances to all potential members + Include=Include(abs(Dist - mean(Dist) <= 3*std(Dist))==1,:); % keep only indices of samples with 3 sigma + + %% Move cluster centre to local densest point + LocMean=mean(DataIn(Include,:),1); + LocScalar=sum((DataIn(Include,:).^2),2)/size(Include,1); % array of scalar products of data dims + % full calculations +% LocDens=1./(1+(pdist2(DataIn(Include,:),LocMean,'euclidean').^2)+LocScalar-(sum(LocMean.^2))); % calculate local densities +% [~,CentreIndex]=max(LocDens); + % slim calculations + LocDens=pdist2(DataIn(Include,:),LocMean,'euclidean').^2 + LocScalar - sum(LocMean.^2); % calculate local densities + [~,CentreIndex]=min(LocDens); + CentreIndex=Include(CentreIndex); + Clusters.Centre(NumClusters,:)=DataIn(CentreIndex,:); % assign cluster centre + + %% Assign data to new centre + Include=bsxfun(@minus,DataIn,Clusters.Centre(NumClusters,:)).^2; % sum square of distances from centre + RadSq=Clusters.Rad(NumClusters,:).^2; % square radii + Include=sum(bsxfun(@rdivide,Include,RadSq),2); % divide by radii and add terms + Include=find(Include<1); + + %% Remove outliers >3sigma + Dist=pdist2(Clusters.Centre(NumClusters,:),DataIn(Include,:)); % distances to all potential members + Include=Include(abs(Dist - mean(Dist) <= 3*std(Dist))==1,:); % keep only indices of samples with 3 sigma + + %% Update radii to maximum distances + for idx=1:size(DataIn,2) + value01=pdist2(DataIn(Include,idx),Clusters.Centre(NumClusters,idx),'Euclidean'); + if max(value01)>0 + Clusters.Rad(NumClusters,idx)=max(value01); + end + end + + %% Assign data to cluster based on new radii + Include=bsxfun(@minus,DataIn,Clusters.Centre(NumClusters,:)).^2; % sum square of distances from centre + RadSq=Clusters.Rad(NumClusters,:).^2; % square radii + Include=sum(bsxfun(@rdivide,Include,RadSq),2); % divide by radii and add terms + Include=find(Include<1); + + %% Remove outliers >3sigma + Dist=pdist2(Clusters.Centre(NumClusters,:),DataIn(Include,:)); % distances to all potential members + Include=Include(abs(Dist - mean(Dist) <= 3*std(Dist))==1,:); % keep only indices of samples with 3 sigma + + %% Update radii to maximum distances + + for idx=1:size(DataIn,2) + value01=pdist2(DataIn(Include,idx),Clusters.Centre(NumClusters,idx),'Euclidean'); + if max(value01)>0 + Clusters.Rad(NumClusters,idx)=max(value01); + else +% Clusters.Rad(NumClusters,idx)=DefaultRadii(idx); + end + end + + %% Plot + if Verbose==1 + hold off;scatter(DataIn(:,1),DataIn(:,2));hold on + scatter(DataIn(CentreIndex,1),DataIn(CentreIndex,2),'r') + scatter(DataIn(Include,1),DataIn(Include,2),'g'); + scatter(Clusters.Centre(NumClusters,1),Clusters.Centre(NumClusters,2),'*','r') + title(sprintf('Clustered: %i, Remaining: %i',size(Results,1)-size(DataIn,1), size(DataIn,1))) + axis([0 1 0 1]) + drawnow + for zz=1:size(Clusters.Centre,1) + rectangle('Position',[Clusters.Centre(zz,1)-Clusters.Rad(zz,1), Clusters.Centre(zz,2)-Clusters.Rad(zz,2), 2*Clusters.Rad(zz,1), 2*Clusters.Rad(zz,2)],'Curvature',[1,1]) + end + end + %% Assign data to final clusters + StartIdx=find(all(Results==0,2),1,'first'); + EndIdx=StartIdx+size(Include,1)-1; + Results(StartIdx:EndIdx,:)=[DataIn(Include,:),ones(size(Include,1),1)*NumClusters]; + DataIn(Include,:)=[]; % remove clustered data +end + +%% Merge clusters if centre is within another cluster +if Merge==1 +MergeAny=1; + while MergeAny==1 + if Verbose==1 + figure(2) + clf + for zz=1:size(Clusters.Centre,1) + rectangle('Position',[Clusters.Centre(zz,1)-Clusters.Rad(zz,1),... + Clusters.Centre(zz,2)-Clusters.Rad(zz,2), 2*Clusters.Rad(zz,1),... + 2*Clusters.Rad(zz,2)],'Curvature',[1,1]) + end + hold on + scatter(Clusters.Centre(:,1),Clusters.Centre(:,2),'*','r') + drawnow + end + + MergeAny=0; + Merges=[]; + % for each cluster & find if cluster centre is within other clusters + for idx1=1:size(Clusters.Centre,1); + InEll=bsxfun(@minus,Clusters.Centre,Clusters.Centre(idx1,1:end)).^2; + InEll=sum(bsxfun(@rdivide,InEll,Clusters.Rad(idx1,:).^2),2); % divide by rad^2 & add + InEll=(InEll<1); + Merges(idx1,:)=InEll.'; + end + Merges(logical(eye(size(Merges))))=0; + % Merge clusters + for idx=1:size(Clusters.Centre,1) + [~,idx1]=find(Merges(idx,:),1); + Results(ismember(Results(:,end),idx1),end)=idx; + if idx1 + MergeAny=1; + end + end + %% renumber clusters + [C,~,ic]=unique(Results(:,end)); + C=1:size(C,1); + Results(:,end)=C(ic); + %% Re-create cluster data + Clusters.Centre=[]; + Clusters.Rad=[]; + for idx1=1:max(Results(:,end)) + Clusters.Centre(idx1,:)=mean(Results(Results(:,3)==idx1,1:end-1),1); + for idx2=1:size(Results,2)-1 + value01=pdist2(Results(Results(:,3)==idx1,idx2),Clusters.Centre(idx1,idx2),'Euclidean'); + if max(value01)>0 + Clusters.Rad(idx1,idx2)=max(value01); + else + Clusters.Rad(idx1,idx2)=0; + end + end + end + + end +end + +end % end function \ No newline at end of file diff --git a/Common Files/EnsembleValue.m b/Common Files/EnsembleValue.m new file mode 100644 index 0000000..9fde6df --- /dev/null +++ b/Common Files/EnsembleValue.m @@ -0,0 +1,13 @@ +function EV = EnsembleValue(Data, LatLon, RadLat, RadLon, RadO3) + +%ENSEMBLEVALUE Summary of this function goes here +% Detailed explanation goes here + + +Data4Cluster = [Data(:),LatLon]; +[Clusters, Results] = DDC_ver01_1_CAMS(Data4Cluster, [RadLat, RadLon, RadO3], 0, 0); +MostCommonCluster = mode(Results(:,end)); +EV = Clusters.Centre(MostCommonCluster); + +end + diff --git a/Common Files/Parallel Progress Bar/ParforProgMon.m b/Common Files/Parallel Progress Bar/ParforProgMon.m new file mode 100644 index 0000000..4a0d0f6 --- /dev/null +++ b/Common Files/Parallel Progress Bar/ParforProgMon.m @@ -0,0 +1,157 @@ +% ParforProgMon - CLASS Progress monitor for `parfor` loops +% +% Usage +% Begin by creating a parallel pool. +% +% Then construct a ParforProgMon object: +% ppm = ParforProgMon(strWindowTitle, nNumIterations <, nProgressStepSize, nWidth, nHeight, nMinIterations>); +% +% 'strWindowTitle' is a string containing the title of the progress bar +% window. 'nNumIterations' is an integer with the total number of +% iterations in the loop. +% +% Optional arguments 'nProgressStepSize' specifies how +% many loop iterations should correspond to a single call to 'increment()'. +% 'nWidth' and 'nHeight' specify the size of the progress window. +% +% Within the parfor loop +% parfor (nIndex = 1:nNumIterations) +% if (mod(nIndex, nProgressStepSize) == 0) +% ppm.increment(); +% end +% end +% +% Modified from ParforProgMonv2. + +classdef ParforProgMon < handle + + properties ( GetAccess = private, SetAccess = private ) + Port + HostName + strAttachedFilesFolder + end + + properties (Transient, GetAccess = private, SetAccess = private) + JavaBit + end + + methods ( Static ) + function o = loadobj( X ) + % loadobj - METHOD REconstruct a ParforProgMon object + + % Once we've been loaded, we need to reconstruct ourselves correctly as a + % worker-side object. + % fprintf('Worker: Starting with {%s, %f, %s}\n', X.HostName, X.Port, X.strAttachedFilesFolder); + o = ParforProgMon( {X.HostName, X.Port, X.strAttachedFilesFolder} ); + end + end + + methods + function o = ParforProgMon(strWindowTitle, nNumIterations, nProgressStepSize, nWidth, nHeight, nMinIterations) + % ParforProgMon - CONSTRUCTOR Create a ParforProgMon object + % + % Usage: ppm = ParforProgMon(strWindowTitle, nNumIterations <, nProgressStepSize, nWidth, nHeight>) + % + % 'strWindowTitle' is a string containing the title of the + % progress bar window. 'nNumIterations' is an integer with the + % total number of iterations in the loop. 'nProgressStepSize' + % indicates that one update (call to 'increment') corresponds to + % this many iterations. 'nWidth' indicates the width of the + % progress window. 'nHeight' indicates the width of the progress + % window. + + if (~exist('nMinIterations', 'var') || isempty(nMinIterations)) + nMinIterations = 0; + end + + % - Are we a worker or a server? + if ((nargin == 1) && iscell(strWindowTitle)) + % - Worker constructor + % Get attached files + o.strAttachedFilesFolder = getAttachedFilesFolder(strWindowTitle{3}); + % fprintf('Worker: Attached files folder on worker is [%s]\n', o.strAttachedFilesFolder); + + % Add to java path + w = warning('off', 'MATLAB:Java:DuplicateClass'); + javaaddpath(o.strAttachedFilesFolder); + warning(w); + + % "Private" constructor used on the workers + o.JavaBit = ParforProgressMonitor.createWorker(strWindowTitle{1}, strWindowTitle{2}); + o.Port = []; + + elseif (nargin > 1) && (nNumIterations >= nMinIterations) + % - Server constructor + % Check arguments + if (~exist('nProgressStepSize', 'var') || isempty(nProgressStepSize)) + nProgressStepSize = 1; + end + + if (~exist('nWidth', 'var') || ~exist('nHeight', 'var') || isempty(nHeight) || isempty(nWidth)) + nWidth = 400; + nHeight = 80; + end + + % Check for an existing pool + pPool = gcp('nocreate'); + if (isempty(pPool)) + error('ParforProgMon:NeedPool', ... + '*** ParforProgMon: You must construct a pool before creating a ParforProgMon object.'); + end + + % Amend java path + strPath = fileparts(which('ParforProgMon')); + o.strAttachedFilesFolder = fullfile(strPath, 'java'); + % fprintf('Server: JAVA class folder is [%s]\n', o.strAttachedFilesFolder); + w = warning('off', 'MATLAB:Java:DuplicateClass'); + javaaddpath(o.strAttachedFilesFolder); + warning(w); + + % Distribute class to pool + if (ismember(pPool.AttachedFiles, o.strAttachedFilesFolder)) + pPool.updateAttachedFiles(); + else + pPool.addAttachedFiles(o.strAttachedFilesFolder); + end + + % Normal construction + o.JavaBit = ParforProgressMonitor.createServer( strWindowTitle, nNumIterations, nProgressStepSize, nWidth, nHeight ); + o.Port = double( o.JavaBit.getPort() ); + % Get the client host name from pctconfig + cfg = pctconfig; + o.HostName = cfg.hostname; + end + end + + function X = saveobj( o ) + % saveobj - METHOD Save a ParforProgMon object for serialisations + + % Only keep the Port, HostName and strAttachedFilesFolder + X.Port = o.Port; + X.HostName = o.HostName; + X.strAttachedFilesFolder = o.strAttachedFilesFolder; + end + + function increment( o ) + % increment - METHOD Indicate that a single loop execution has finished + + % Update the UI + if (~isempty(o.JavaBit)) + o.JavaBit.increment(); + end + end + + function delete( o ) + % delete - METHOD Delete a ParforProgMon object + + % - Make sure that any other threads that may have closed + % the UI down have a chance to do it first + pause(.01); + + % Close the UI + if (~isempty(o.JavaBit)) + o.JavaBit.done(); + end + end + end +end diff --git a/Common Files/Parallel Progress Bar/Readme.md b/Common Files/Parallel Progress Bar/Readme.md new file mode 100644 index 0000000..4c37ee0 --- /dev/null +++ b/Common Files/Parallel Progress Bar/Readme.md @@ -0,0 +1,31 @@ +# Parfor progress monitor + + +## A Java-based `Matlab` class for progress monitoring during a `parfor` loop + +### Usage +Begin by creating a parallel pool. + +Then construct a ParforProgMon object: + + ppm = ParforProgMon(strWindowTitle, nNumIterations <, nProgressStepSize, nWidth, nHeight>); + + `strWindowTitle` is a string containing the title of the progress bar + window. `nNumIterations` is an integer with the total number of + iterations in the loop. + +#### Optional arguments + `nProgressStepSize` specifies to update the progress bar every time this + number of steps passes. `nWidth` and `nHeight` specify the size of the + progress window. + +Within the `parfor` loop: + + parfor (nIndex = 1:nNumIterations) + ppm.increment(); + end + +### Credits +[Parfor Progress monitor](https://www.mathworks.com/matlabcentral/fileexchange/24594-parfor-progress-monitor) + +[Parfor Progress monitor v2](https://www.mathworks.com/matlabcentral/fileexchange/31673-parfor-progress-monitor-v2) diff --git a/Common Files/Parallel Progress Bar/java/ParforProgressMonitor$1.class b/Common Files/Parallel Progress Bar/java/ParforProgressMonitor$1.class new file mode 100644 index 0000000..1dc7bbd Binary files /dev/null and b/Common Files/Parallel Progress Bar/java/ParforProgressMonitor$1.class differ diff --git a/Common Files/Parallel Progress Bar/java/ParforProgressMonitor$ProgServer$1.class b/Common Files/Parallel Progress Bar/java/ParforProgressMonitor$ProgServer$1.class new file mode 100644 index 0000000..cd8d816 Binary files /dev/null and b/Common Files/Parallel Progress Bar/java/ParforProgressMonitor$ProgServer$1.class differ diff --git a/Common Files/Parallel Progress Bar/java/ParforProgressMonitor$ProgServer.class b/Common Files/Parallel Progress Bar/java/ParforProgressMonitor$ProgServer.class new file mode 100644 index 0000000..12833f4 Binary files /dev/null and b/Common Files/Parallel Progress Bar/java/ParforProgressMonitor$ProgServer.class differ diff --git a/Common Files/Parallel Progress Bar/java/ParforProgressMonitor$ProgThing.class b/Common Files/Parallel Progress Bar/java/ParforProgressMonitor$ProgThing.class new file mode 100644 index 0000000..3925ae2 Binary files /dev/null and b/Common Files/Parallel Progress Bar/java/ParforProgressMonitor$ProgThing.class differ diff --git a/Common Files/Parallel Progress Bar/java/ParforProgressMonitor$ProgWorker.class b/Common Files/Parallel Progress Bar/java/ParforProgressMonitor$ProgWorker.class new file mode 100644 index 0000000..2e9bc6d Binary files /dev/null and b/Common Files/Parallel Progress Bar/java/ParforProgressMonitor$ProgWorker.class differ diff --git a/Common Files/Parallel Progress Bar/java/ParforProgressMonitor.class b/Common Files/Parallel Progress Bar/java/ParforProgressMonitor.class new file mode 100644 index 0000000..210fe5b Binary files /dev/null and b/Common Files/Parallel Progress Bar/java/ParforProgressMonitor.class differ diff --git a/Common Files/Parallel Progress Bar/java/ParforProgressMonitor.java b/Common Files/Parallel Progress Bar/java/ParforProgressMonitor.java new file mode 100644 index 0000000..2087af6 --- /dev/null +++ b/Common Files/Parallel Progress Bar/java/ParforProgressMonitor.java @@ -0,0 +1,198 @@ +import javax.swing.*; +import java.io.*; +import java.net.*; +import java.util.concurrent.atomic.AtomicBoolean; + +// Copyright 2009 The MathWorks, Inc. + +public class ParforProgressMonitor { + + /** + * Create a "server" progress monitor - this runs on the desktop client and + * pops up the progress monitor UI. + */ + public static ProgServer createServer( String s, int N, int progressStepSize, int width, int height ) + throws IOException { + ProgServer ret = new ProgServer( s, N, progressStepSize, width, height ); + ret.start(); + return ret; + } + + /** + * Create a "worker" progress monitor - runs on the remote lab and sends updates + */ + public static ProgWorker createWorker( String host, int port ) + throws IOException { + return new ProgWorker( host, port ); + } + + /** + * Common interface exposed by both objects + */ + public interface ProgThing { + public void increment(); + public void done(); + } + + /** + * The worker-side object. Simply connects to the server to indicate that a + * quantum of progress has been made. This is a very basic implementation - + * a more sophisticated implementation would use a persistent connection, + * and a SocketChannel on the client with a thread doing a select loop and + * accepting connections etc. + */ + private static class ProgWorker implements ProgThing { + private int fPort; + private String fHost; + private ProgWorker( String host, int port ) { + fHost = host; + fPort = port; + } + + /** + * Connect and disconnect immediately to indicate progress + */ + public void increment() { + try { + Socket s = new Socket( fHost, fPort ); + s.close(); + } catch( Exception e ) { + e.printStackTrace(); + } + } + + /** + * Nothing for us to do here + */ + public void done() { + } + } + + /** + * The client-side object which pops up a window with a + * JProgressBar. Accepts connections from the workers, and then disconnects + * them immediately. Beware, the connection backlog of the ServerSocket + * might be insufficient. + */ + private static class ProgServer implements Runnable, ProgThing { + private JFrame fFrame; + private JProgressBar fBar; + private ServerSocket fSocket; + private int fValue, fN, fStep; + private String title; + private Thread fThread; + private AtomicBoolean fKeepGoing; + + private ProgServer( String s, int N, int progressStepSize, int width, int height ) throws IOException { + // The UI + fFrame = new JFrame( s ); + fBar = new JProgressBar( 0, N ); + fFrame.getContentPane().add( fBar ); + fFrame.pack(); + fFrame.setSize(width,height); + fFrame.setLocationRelativeTo( null ); + fFrame.setVisible( true ); + + // How far we are through - requires synchronized access + fValue = 0; + fN = N; + fStep = progressStepSize; + title = s; + + // Get an anonymous port + fSocket = new ServerSocket( 0 ); + // Set SO_TIMEOUT so that we don't block forever + fSocket.setSoTimeout( 100 ); + + // Our background thread + fThread = new Thread( this ); + fThread.setDaemon( true ); + + // Used to indicate to fThread when it's time to go + fKeepGoing = new AtomicBoolean( true ); + } + + /** + * Don't start the Thread in the constructor + */ + public void start() { fThread.start(); } + + /** + * Loop over accepting connections and updating + */ + public void run() { + while( fKeepGoing.get() ) { + try { + acceptAndIncrement(); + } catch( Exception e ) { + if( fKeepGoing.get() ) { + e.printStackTrace(); + } + } + } + } + + /** + * If there's a connection - accept and then disconnect; increment our count. + */ + private void acceptAndIncrement() throws IOException { + Socket worker; + try { + worker = fSocket.accept(); + } catch( SocketTimeoutException timeout ) { + // don't care about timeouts + return; + } + worker.close(); + increment(); + } + + + /** + * On the EDT, update the progress bar + */ + private void updateBar( final int newVal ) { + SwingUtilities.invokeLater( new Runnable() { + public void run() { + fBar.setValue( fStep*newVal ); + double percentage = 100.0*fStep*newVal/fN; + fFrame.setTitle(title + (int)percentage + "% completed."); + if ( fStep*newVal >= fBar.getMaximum() ) { + done(); + } + } + } ); + } + + /** + * M-code needs to know which port we got + */ + public int getPort() { + return ((InetSocketAddress)fSocket.getLocalSocketAddress()).getPort(); + } + + /** + * Provide public access to this for pool-close PARFORs + */ + public synchronized void increment() { + fValue++; + updateBar( fValue ); + } + + /** + * Shut it all down + */ + public void done() { + fKeepGoing.set( false ); + try { + fSocket.close(); + } catch( Exception e ) { + e.printStackTrace(); + } + fFrame.dispose(); + } + } + + /** This class isn't useful - use the static methods */ + private ParforProgressMonitor() {} +} \ No newline at end of file diff --git a/Common Files/Parallel Progress Bar/license.txt b/Common Files/Parallel Progress Bar/license.txt new file mode 100644 index 0000000..68eb73a --- /dev/null +++ b/Common Files/Parallel Progress Bar/license.txt @@ -0,0 +1,29 @@ +Copyright (c) 2016, Dylan Muir +Copyright (c) 2011, Willem-Jan de Goeij +Copyright (c) 2009, The MathWorks, Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the distribution + * Neither the name of the The MathWorks, Inc. nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. diff --git a/Common Files/Parallel Progress Bar/progress_bar.png b/Common Files/Parallel Progress Bar/progress_bar.png new file mode 100644 index 0000000..8fc476c Binary files /dev/null and b/Common Files/Parallel Progress Bar/progress_bar.png differ diff --git a/Common Files/PrepareData.m b/Common Files/PrepareData.m new file mode 100644 index 0000000..31adfad --- /dev/null +++ b/Common Files/PrepareData.m @@ -0,0 +1,34 @@ +function [ SegVector, LatLon ] = PrepareData(O3Data, Lat, Lon) +%UNTITLED2 Summary of this function goes here +% Detailed explanation goes here + +fprintf('Creating segments....') + +GeogSlice = 2; +DimSize = 2*GeogSlice+1; + +% tic +SegLatLon = zeros(400-GeogSlice, 700-GeogSlice,7,2*GeogSlice+1,2*GeogSlice+1); +idxSeg = 0; + + +for idxLat = GeogSlice+1:400-GeogSlice +% idxLat + for idxLon = GeogSlice+1:700-GeogSlice + SegLatLon(idxLat, idxLon, :, :, :) =... + O3Data(:, idxLon-GeogSlice:idxLon+GeogSlice, idxLat-GeogSlice:idxLat+GeogSlice); + end +end + +fprintf('Segments created\n') + +SegVector = reshape(SegLatLon,[],7,DimSize,DimSize); +LatSpace = abs(Lat(2)-Lat(1)); +LatList = [1:DimSize]*LatSpace; +LonSpace = abs(Lon(2)-Lon(1)); +LonList = [1:DimSize]*LonSpace; +[X, Y] = meshgrid(LonList,LatList); +LatLon = repmat([X(:),Y(:)], 7, 1); + +end +