diff --git a/Assessment/5011CEM Report.docx b/Assessment/5011CEM Report.docx new file mode 100644 index 0000000..550e656 Binary files /dev/null and b/Assessment/5011CEM Report.docx differ diff --git a/Assessment/5011CEMResit.jpg b/Assessment/5011CEMResit.jpg new file mode 100644 index 0000000..83d0955 Binary files /dev/null and b/Assessment/5011CEMResit.jpg differ diff --git a/Assessment/DDC_ver01_1_CAMS.m b/Assessment/DDC_ver01_1_CAMS.m new file mode 100644 index 0000000..4369583 --- /dev/null +++ b/Assessment/DDC_ver01_1_CAMS.m @@ -0,0 +1,214 @@ +function [ Clusters, Results ] = DDC_ver01_1_CAMS( varargin ) +%DDC_VER01.1 Data Density Based Clustering +% Copyright R Hyde 2017 +% Released under the GNU GPLver3.0 +% You should have received a copy of the GNU General Public License +% along with this program. If not, see 0 + % size(DataIn,1) % uncomment to trace remaining data + NumClusters=NumClusters+1; + Clusters.Rad(NumClusters,:)=InitR; + %% Find Cluster Centre + Glob_Mean=mean(DataIn,1); % array of means of data dim + Glob_Scalar=sum(sum((DataIn.*DataIn),2),1)/size(DataIn,1); % array of scalar products for each data dim + % full calculations +% GDensity=1./(1+(pdist2(DataIn,Glob_Mean,'euclidean').^2)+Glob_Scalar-(sum(Glob_Mean.^2))); % calculate global densities +% [~, CentreIndex]=max(GDensity); % find index of max densest point + % slim calculations + GDensity=pdist2(DataIn,Glob_Mean,'euclidean').^2 + Glob_Scalar - sum(Glob_Mean.^2); % calculate global densities + [~, CentreIndex]=min(GDensity); % find index of max densest point + + %% Find points belonging to cluster + Include=bsxfun(@minus,DataIn,DataIn(CentreIndex,:)).^2; % sum square of distances from centre + RadSq=Clusters.Rad(NumClusters,:).^2; % square radii + Include=sum(bsxfun(@rdivide,Include,RadSq),2); % divide by radii and add terms + Include=find(Include<1); + + %% Remove outliers >3sigma + Dist=pdist2(DataIn(Include,:),DataIn(CentreIndex,:)); % distances to all potential members + Include=Include(abs(Dist - mean(Dist) <= 3*std(Dist))==1,:); % keep only indices of samples with 3 sigma + + %% Move cluster centre to local densest point + LocMean=mean(DataIn(Include,:),1); + LocScalar=sum((DataIn(Include,:).^2),2)/size(Include,1); % array of scalar products of data dims + % full calculations +% LocDens=1./(1+(pdist2(DataIn(Include,:),LocMean,'euclidean').^2)+LocScalar-(sum(LocMean.^2))); % calculate local densities +% [~,CentreIndex]=max(LocDens); + % slim calculations + LocDens=pdist2(DataIn(Include,:),LocMean,'euclidean').^2 + LocScalar - sum(LocMean.^2); % calculate local densities + [~,CentreIndex]=min(LocDens); + CentreIndex=Include(CentreIndex); + Clusters.Centre(NumClusters,:)=DataIn(CentreIndex,:); % assign cluster centre + + %% Assign data to new centre + Include=bsxfun(@minus,DataIn,Clusters.Centre(NumClusters,:)).^2; % sum square of distances from centre + RadSq=Clusters.Rad(NumClusters,:).^2; % square radii + Include=sum(bsxfun(@rdivide,Include,RadSq),2); % divide by radii and add terms + Include=find(Include<1); + + %% Remove outliers >3sigma + Dist=pdist2(Clusters.Centre(NumClusters,:),DataIn(Include,:)); % distances to all potential members + Include=Include(abs(Dist - mean(Dist) <= 3*std(Dist))==1,:); % keep only indices of samples with 3 sigma + + %% Update radii to maximum distances + for idx=1:size(DataIn,2) + value01=pdist2(DataIn(Include,idx),Clusters.Centre(NumClusters,idx),'Euclidean'); + if max(value01)>0 + Clusters.Rad(NumClusters,idx)=max(value01); + end + end + + %% Assign data to cluster based on new radii + Include=bsxfun(@minus,DataIn,Clusters.Centre(NumClusters,:)).^2; % sum square of distances from centre + RadSq=Clusters.Rad(NumClusters,:).^2; % square radii + Include=sum(bsxfun(@rdivide,Include,RadSq),2); % divide by radii and add terms + Include=find(Include<1); + + %% Remove outliers >3sigma + Dist=pdist2(Clusters.Centre(NumClusters,:),DataIn(Include,:)); % distances to all potential members + Include=Include(abs(Dist - mean(Dist) <= 3*std(Dist))==1,:); % keep only indices of samples with 3 sigma + + %% Update radii to maximum distances + + for idx=1:size(DataIn,2) + value01=pdist2(DataIn(Include,idx),Clusters.Centre(NumClusters,idx),'Euclidean'); + if max(value01)>0 + Clusters.Rad(NumClusters,idx)=max(value01); + else +% Clusters.Rad(NumClusters,idx)=DefaultRadii(idx); + end + end + + %% Plot + if Verbose==1 + hold off;scatter(DataIn(:,1),DataIn(:,2));hold on + scatter(DataIn(CentreIndex,1),DataIn(CentreIndex,2),'r') + scatter(DataIn(Include,1),DataIn(Include,2),'g'); + scatter(Clusters.Centre(NumClusters,1),Clusters.Centre(NumClusters,2),'*','r') + title(sprintf('Clustered: %i, Remaining: %i',size(Results,1)-size(DataIn,1), size(DataIn,1))) + axis([0 1 0 1]) + drawnow + for zz=1:size(Clusters.Centre,1) + rectangle('Position',[Clusters.Centre(zz,1)-Clusters.Rad(zz,1), Clusters.Centre(zz,2)-Clusters.Rad(zz,2), 2*Clusters.Rad(zz,1), 2*Clusters.Rad(zz,2)],'Curvature',[1,1]) + end + end + %% Assign data to final clusters + StartIdx=find(all(Results==0,2),1,'first'); + EndIdx=StartIdx+size(Include,1)-1; + Results(StartIdx:EndIdx,:)=[DataIn(Include,:),ones(size(Include,1),1)*NumClusters]; + DataIn(Include,:)=[]; % remove clustered data +end + +%% Merge clusters if centre is within another cluster +if Merge==1 +MergeAny=1; + while MergeAny==1 + if Verbose==1 + figure(2) + clf + for zz=1:size(Clusters.Centre,1) + rectangle('Position',[Clusters.Centre(zz,1)-Clusters.Rad(zz,1),... + Clusters.Centre(zz,2)-Clusters.Rad(zz,2), 2*Clusters.Rad(zz,1),... + 2*Clusters.Rad(zz,2)],'Curvature',[1,1]) + end + hold on + scatter(Clusters.Centre(:,1),Clusters.Centre(:,2),'*','r') + drawnow + end + + MergeAny=0; + Merges=[]; + % for each cluster & find if cluster centre is within other clusters + for idx1=1:size(Clusters.Centre,1); + InEll=bsxfun(@minus,Clusters.Centre,Clusters.Centre(idx1,1:end)).^2; + InEll=sum(bsxfun(@rdivide,InEll,Clusters.Rad(idx1,:).^2),2); % divide by rad^2 & add + InEll=(InEll<1); + Merges(idx1,:)=InEll.'; + end + Merges(logical(eye(size(Merges))))=0; + % Merge clusters + for idx=1:size(Clusters.Centre,1) + [~,idx1]=find(Merges(idx,:),1); + Results(ismember(Results(:,end),idx1),end)=idx; + if idx1 + MergeAny=1; + end + end + %% renumber clusters + [C,~,ic]=unique(Results(:,end)); + C=1:size(C,1); + Results(:,end)=C(ic); + %% Re-create cluster data + Clusters.Centre=[]; + Clusters.Rad=[]; + for idx1=1:max(Results(:,end)) + Clusters.Centre(idx1,:)=mean(Results(Results(:,3)==idx1,1:end-1),1); + for idx2=1:size(Results,2)-1 + value01=pdist2(Results(Results(:,3)==idx1,idx2),Clusters.Centre(idx1,idx2),'Euclidean'); + if max(value01)>0 + Clusters.Rad(idx1,idx2)=max(value01); + else + Clusters.Rad(idx1,idx2)=0; + end + end + end + + end +end + +end % end function \ No newline at end of file diff --git a/Assessment/EnsembleValue.m b/Assessment/EnsembleValue.m new file mode 100644 index 0000000..9fde6df --- /dev/null +++ b/Assessment/EnsembleValue.m @@ -0,0 +1,13 @@ +function EV = EnsembleValue(Data, LatLon, RadLat, RadLon, RadO3) + +%ENSEMBLEVALUE Summary of this function goes here +% Detailed explanation goes here + + +Data4Cluster = [Data(:),LatLon]; +[Clusters, Results] = DDC_ver01_1_CAMS(Data4Cluster, [RadLat, RadLon, RadO3], 0, 0); +MostCommonCluster = mode(Results(:,end)); +EV = Clusters.Centre(MostCommonCluster); + +end + diff --git a/Assessment/GraphAutomation.m b/Assessment/GraphAutomation.m new file mode 100644 index 0000000..6444a89 --- /dev/null +++ b/Assessment/GraphAutomation.m @@ -0,0 +1,33 @@ +function [] = GraphAutomation(dataSize,x1Vals,y1Vals,x2Vals,y2Vals,x3Vals,y3Vals) + +figure('Name','Processing Speeds Per Processors','NumberTitle','off'); +yyaxis left +plot(x1Vals, y1Vals, '-bd') +hold on +ylabel('Processing time (s)') +yyaxis right +plot(x2Vals, y2Vals, '-rx') +hold on +plot(x3Vals, y3Vals, '-gs') +xlabel('Number of Processors') +ylabel('Processing time (s)') +title('Processing time vs number of processors') +legend(num2str(dataSize(1)), num2str(dataSize(2)), num2str(dataSize(3))) +saveas(gcf,'Processing Times.png') + +%% Mean processing time +y1MeanVals = y1Vals / dataSize(1); +y2MeanVals = y2Vals / dataSize(2); +y3MeanVals = y3Vals / dataSize(3); +figure('Name','Processing Mean Time Per Processors','NumberTitle','off'); +plot(x1Vals, y1MeanVals, '-bd') +hold on +plot(x2Vals, y2MeanVals, '-rx') +hold on +plot(x3Vals, y3MeanVals, '-gs') +xlabel('Number of Processors') +ylabel('Processing time (s)') +title('Mean Processing time vs number of processors') +legend(num2str(dataSize(1)), num2str(dataSize(2)), num2str(dataSize(3))) +saveas(gcf,'Mean Processing Times.png') +end \ No newline at end of file diff --git a/Assessment/Graphing Flowchart.jpg b/Assessment/Graphing Flowchart.jpg new file mode 100644 index 0000000..5badff6 Binary files /dev/null and b/Assessment/Graphing Flowchart.jpg differ diff --git a/Assessment/LogBook.numbers b/Assessment/LogBook.numbers new file mode 100644 index 0000000..b3832b3 Binary files /dev/null and b/Assessment/LogBook.numbers differ diff --git a/Assessment/Main Process Flowchart.jpg b/Assessment/Main Process Flowchart.jpg new file mode 100644 index 0000000..2c5fed1 Binary files /dev/null and b/Assessment/Main Process Flowchart.jpg differ diff --git a/Assessment/Main.asv b/Assessment/Main.asv new file mode 100644 index 0000000..e8519ee --- /dev/null +++ b/Assessment/Main.asv @@ -0,0 +1,28 @@ +addpath '/Users/youssef/Desktop/5011CEM Resit/Common Files' +FileName = '/Users/youssef/Desktop/5011CEM Resit/NC Files/o3_surface_20180701000000.nc'; %%%Insert File Name to be ran +DataSizes = [8000, 10000, 12000]; %%%Insert the amount of data you would like to process from the model per hour +LogFileName = 'ProcessingResults.txt'; +LogID = fopen('ProcessingResults.txt', 'a'); +Processing_Times = []; +Processors = [0, 1, 2, 3, 4, 5, 6, 7, 8]; +if TextScriptTest(FileName) == 1 + fprintf('Script Contains Text Errors: Refer to TextTestLog.txt for more info\n') + return +else + fprintf('Tests Successful! \n Processing Initiate\n') + for idx = 1:length(DataSizes) + fprintf(LogID, '%s: Processing Data of Size %i s\n\n', datestr(now, 0), DataSizes(idx)); + Processing_Times(idx, 1) = SequentialAutomation(DataSizes(idx),FileName); + fprintf(LogID, '%s: Total time for processing with %i of data sequentialy = %.2f s\n\n', datestr(now, 0), DataSizes(idx), Processing_Times(idx, 1)); + for workers = 1:8 + Processing_Times(idx, workers+1) = ParallelAutomation(workers, DataSizes(idx),FileName); + fprintf(LogID, '%s: Total time for processing with %i of data and %i workers = %.2f s\n\n', datestr(now, 0), DataSizes(idx), workers, Processing_Times(idx, workers+1)); + end + end + for element = 1:numel(Processing_Times) + Processing_Times(element) = (Processing_Times(element)/3)*25; + end + GraphAutomation(DataSizes, Processors, Processing_Times(1, :), Processors, Processing_Times(2, :), Processors, Processing_Times(3, :)); + fprintf('Processing Successful, Please refer to ProcessingResults.txt for information') +end +fclose(LogID); \ No newline at end of file diff --git a/Assessment/Main.m b/Assessment/Main.m new file mode 100644 index 0000000..2414923 --- /dev/null +++ b/Assessment/Main.m @@ -0,0 +1,31 @@ +addpath '/Users/youssef/Desktop/5011CEM Resit/Common Files' +FileName = '/Users/youssef/Desktop/5011CEM Resit/NC Files/o3_surface_20180701000000.nc'; %%%Insert File Name to be ran +DataSizes = [10, 20, 30]; %%%Insert the amount of data you would like to process from the model per hour +LogFileName = 'ProcessingResults.txt'; +LogID = fopen('ProcessingResults.txt', 'a'); +Processing_Times = []; +Processors = [0, 1, 2, 3, 4, 5, 6, 7, 8]; +Contents = ncinfo(FileName); +Lat = ncread(FileName, 'lat'); % load the latitude locations +Lon = ncread(FileName, 'lon'); % loadthe longitude locations +if TextScriptTest(FileName) == 1 + fprintf('Script Contains Text Errors: Refer to TextTestLog.txt for more info\n') + return +else + fprintf('Tests Successful! \n Processing Initiate\n') + for idx = 1:length(DataSizes) + fprintf(LogID, '%s: Processing Data of Size %i s\n\n', datestr(now, 0), DataSizes(idx)); + Processing_Times(idx, 1) = SequentialAutomation(DataSizes(idx),FileName, Contents, Lat, Lon); + fprintf(LogID, '%s: Total time for processing with %i of data sequentialy = %.2f s\n\n', datestr(now, 0), DataSizes(idx), Processing_Times(idx, 1)); + for workers = 1:8 + Processing_Times(idx, workers+1) = ParallelAutomation(workers, DataSizes(idx),FileName, Contents, Lat, Lon); + fprintf(LogID, '%s: Total time for processing with %i of data and %i workers = %.2f s\n\n', datestr(now, 0), DataSizes(idx), workers, Processing_Times(idx, workers+1)); + end + end + for element = 1:numel(Processing_Times) + Processing_Times(element) = (Processing_Times(element)/3)*25; + end + GraphAutomation(DataSizes, Processors, Processing_Times(1, :), Processors, Processing_Times(2, :), Processors, Processing_Times(3, :)); + fprintf('Processing Successful, Please refer to ProcessingResults.txt for information') +end +fclose(LogID); \ No newline at end of file diff --git a/Assessment/Mean Processing Time Per Processors Perfect Graph.fig b/Assessment/Mean Processing Time Per Processors Perfect Graph.fig new file mode 100644 index 0000000..89f41c8 Binary files /dev/null and b/Assessment/Mean Processing Time Per Processors Perfect Graph.fig differ diff --git a/Assessment/Mean Processing Time Per Processors Perfect Graph.jpg b/Assessment/Mean Processing Time Per Processors Perfect Graph.jpg new file mode 100644 index 0000000..8807566 Binary files /dev/null and b/Assessment/Mean Processing Time Per Processors Perfect Graph.jpg differ diff --git a/Assessment/NaNScriptTest.m b/Assessment/NaNScriptTest.m new file mode 100644 index 0000000..27d976f --- /dev/null +++ b/Assessment/NaNScriptTest.m @@ -0,0 +1,39 @@ +function [NaNErrors] = NaNScriptTest(FileName) +%UNTITLED Summary of this function goes here +% Detailed explanation goes here +%% Test File with Errors +NaNErrors = 0; +%% Set file to test; +Contents = ncinfo(FileName); % Store the file content information in a variable. +LogFileName = 'NanTestLog.txt'; +LogID = fopen('NanTestLog.txt', 'w'); +fprintf(LogID, '%s: Looking for NaN data in %s.. \n', datestr(now, 0), FileName); +StartLat = 1; +StartLon = 1; + +fprintf('Testing files: %s\n', FileName) +for idxHour = 1:25 + + for idxModel = 1:8 + Data(idxModel,:,:) = ncread(FileName, Contents.Variables(idxModel).Name,... + [StartLat, StartLon, idxHour], [inf, inf, 1]); + end + + % check for NaNs + if any(isnan(Data), 'All') + fprintf('NaNs present during hour %i\n', idxHour) + NaNErrors = 1; + end +end +fprintf('Testing for NaN errors in file: %s\n', FileName) +fprintf(LogID, 'Testing files: %s\n', FileName); +if NaNErrors + fprintf('NaN errors present!\n') + fprintf(LogID, '%s: NaN errors present!\n', datestr(now, 0)); +else + fprintf('No errors!\n') + fprintf(LogID, '%s: No errors!\n', datestr(now, 0)); +end +fclose(LogID); +end + diff --git a/Assessment/PHOTO-2021-11-24-21-27-34.jpg b/Assessment/PHOTO-2021-11-24-21-27-34.jpg new file mode 100644 index 0000000..53a4de2 Binary files /dev/null and b/Assessment/PHOTO-2021-11-24-21-27-34.jpg differ diff --git a/Assessment/Parallel Processing Flowchart.jpg b/Assessment/Parallel Processing Flowchart.jpg new file mode 100644 index 0000000..2c1af22 Binary files /dev/null and b/Assessment/Parallel Processing Flowchart.jpg differ diff --git a/Assessment/ParallelAutomation.asv b/Assessment/ParallelAutomation.asv new file mode 100644 index 0000000..9141132 --- /dev/null +++ b/Assessment/ParallelAutomation.asv @@ -0,0 +1,91 @@ +function tSeq = ParallelAutomation(Workers, DataSize,FileName, Contents, Lat, Lon) + +NumHours = 3; +%% 2: Processing parameters +% ## provided by customer ## +RadLat = 30.2016; +RadLon = 24.8032; +RadO3 = 4.2653986e-08; +NanErrors = 0; +StartLat = 1; +NumLat = 400; +StartLon = 1; +NumLon = 700; +%% 3: Pre-allocate output array memory +% the '-4' value is due to the analysis method resulting in fewer output +% values than the input array. +NumLocations = (NumLon - 4) * (NumLat - 4); +EnsembleVectorPar = zeros(NumLocations, NumHours); % pre-allocate memory + +%% 4: Cycle through the hours and load all the models for each hour and record memory use +% We use an index named 'NumHour' in our loop +% The section 'parallel processing' will process the data location one +% after the other, reporting on the time involved. +tic +for idxTime = 1:NumHours + + %% 5: Load the data for each hour + % Each hour we read the data from the required models, defined by the + % index variable. Each model data are placed on a 'layer' of the 3D + % array resulting in a 7 x 700 x 400 array. + % We do this by indexing through the model names, then defining the + % start position as the beginnning of the Lat, beginning of the Lon and + % beginning of the new hour. We then define the number of elements + % along each data dimension, so the total number of Lat, the total + % number of Lon, but only 1 hour. + % You can use these values to select a smaller sub-set of the data if + % required to speed up testing o fthe functionality. + + DataLayer = 1; + for idx = [1, 2, 4, 5, 6, 7, 8] + HourlyData(DataLayer,:,:) = ncread(FileName, Contents.Variables(idx).Name,... + [StartLon, StartLat, idxTime], [NumLon, NumLat, 1]); + DataLayer = DataLayer + 1; + end + if any(isnan(HourlyData), 'All') + fprintf('NaNs present during hour %i\n Skipping Hour\n', idxTime) + continue + end + %% 6: Pre-process the data for parallel processing + % This takes the 3D array of data [model, lat, lon] and generates the + % data required to be processed at each location. + % ## This process is defined by the customer ## + % If you want to know the details, please ask, but this is not required + % for the module or assessment. + [Data2Process, LatLon] = PrepareData(HourlyData, Lat, Lon); + + +%% Parallel Analysis + %% 7: Create the parallel pool and attache files for use + PoolSize = Workers ; % define the number of processors to use in parallel + if isempty(gcp('nocreate')) + parpool('LocalProfile1',Workers); + end + poolobj = gcp; + % attaching a file allows it to be available at each processor without + % passing the file each time. This speeds up the process. For more + % information, ask your tutor. + addAttachedFiles(poolobj,{'EnsembleValue'}); + + %% 9: The actual parallel processing! + % Ensemble value is a function defined by the customer to calculate the + % ensemble value at each location. Understanding this function is not + % required for the module or the assessment, but it is the reason for + % this being a 'big data' project due to the processing time (not the + % pure volume of raw data alone). + T4 = toc; + parfor idx = 1: DataSize % size(Data2Process,1) + [EnsembleVectorPar(idx, idxTime)] = EnsembleValue(Data2Process(idx,:,:,:), LatLon, RadLat, RadLon, RadO3); + end + + T3(idxTime) = toc - T4; % record the parallel processing time for this hour of data + fprintf('Parallel processing time for hour %i : %.1f s\n', idxTime, T3(idxTime)) + +end % end time loop +T2 = toc; +delete(gcp); +%% 10: Reshape ensemble values to Lat, lon, hour format +EnsembleVectorPar = reshape(EnsembleVectorPar, 696, 396, []); +fprintf('Total processing time for %i workers = %.2f s\n', PoolSize, sum(T3)); +tSeq = sum(T3); +end \ No newline at end of file diff --git a/Assessment/ParallelAutomation.m b/Assessment/ParallelAutomation.m new file mode 100644 index 0000000..82e9fd7 --- /dev/null +++ b/Assessment/ParallelAutomation.m @@ -0,0 +1,90 @@ +function tSeq = ParallelAutomation(Workers, DataSize,FileName, Contents, Lat, Lon) +NumHours = 3; +%% 2: Processing parameters +% ## provided by customer ## +RadLat = 30.2016; +RadLon = 24.8032; +RadO3 = 4.2653986e-08; +NanErrors = 0; +StartLat = 1; +NumLat = 400; +StartLon = 1; +NumLon = 700; +%% 3: Pre-allocate output array memory +% the '-4' value is due to the analysis method resulting in fewer output +% values than the input array. +NumLocations = (NumLon - 4) * (NumLat - 4); +EnsembleVectorPar = zeros(NumLocations, NumHours); % pre-allocate memory + +%% 4: Cycle through the hours and load all the models for each hour and record memory use +% We use an index named 'NumHour' in our loop +% The section 'parallel processing' will process the data location one +% after the other, reporting on the time involved. +tic +for idxTime = 1:NumHours + + %% 5: Load the data for each hour + % Each hour we read the data from the required models, defined by the + % index variable. Each model data are placed on a 'layer' of the 3D + % array resulting in a 7 x 700 x 400 array. + % We do this by indexing through the model names, then defining the + % start position as the beginnning of the Lat, beginning of the Lon and + % beginning of the new hour. We then define the number of elements + % along each data dimension, so the total number of Lat, the total + % number of Lon, but only 1 hour. + % You can use these values to select a smaller sub-set of the data if + % required to speed up testing o fthe functionality. + + DataLayer = 1; + for idx = [1, 2, 4, 5, 6, 7, 8] + HourlyData(DataLayer,:,:) = ncread(FileName, Contents.Variables(idx).Name,... + [StartLon, StartLat, idxTime], [NumLon, NumLat, 1]); + DataLayer = DataLayer + 1; + end + if any(isnan(HourlyData), 'All') + fprintf('NaNs present during hour %i\n Skipping Hour\n', idxTime) + continue + end + %% 6: Pre-process the data for parallel processing + % This takes the 3D array of data [model, lat, lon] and generates the + % data required to be processed at each location. + % ## This process is defined by the customer ## + % If you want to know the details, please ask, but this is not required + % for the module or assessment. + [Data2Process, LatLon] = PrepareData(HourlyData, Lat, Lon); + + +%% Parallel Analysis + %% 7: Create the parallel pool and attache files for use + PoolSize = Workers ; % define the number of processors to use in parallel + if isempty(gcp('nocreate')) + parpool('LocalProfile1',Workers); + end + poolobj = gcp; + % attaching a file allows it to be available at each processor without + % passing the file each time. This speeds up the process. For more + % information, ask your tutor. + addAttachedFiles(poolobj,{'EnsembleValue'}); + + %% 9: The actual parallel processing! + % Ensemble value is a function defined by the customer to calculate the + % ensemble value at each location. Understanding this function is not + % required for the module or the assessment, but it is the reason for + % this being a 'big data' project due to the processing time (not the + % pure volume of raw data alone). + T4 = toc; + parfor idx = 1: DataSize % size(Data2Process,1) + [EnsembleVectorPar(idx, idxTime)] = EnsembleValue(Data2Process(idx,:,:,:), LatLon, RadLat, RadLon, RadO3); + end + + T3(idxTime) = toc - T4; % record the parallel processing time for this hour of data + fprintf('Parallel processing time for hour %i : %.1f s\n', idxTime, T3(idxTime)) + +end % end time loop +T2 = toc; +delete(gcp); +%% 10: Reshape ensemble values to Lat, lon, hour format +EnsembleVectorPar = reshape(EnsembleVectorPar, 696, 396, []); +fprintf('Total processing time for %i workers = %.2f s\n', PoolSize, sum(T3)); +tSeq = sum(T3); +end \ No newline at end of file diff --git a/Assessment/PrepareData.m b/Assessment/PrepareData.m new file mode 100644 index 0000000..31adfad --- /dev/null +++ b/Assessment/PrepareData.m @@ -0,0 +1,34 @@ +function [ SegVector, LatLon ] = PrepareData(O3Data, Lat, Lon) +%UNTITLED2 Summary of this function goes here +% Detailed explanation goes here + +fprintf('Creating segments....') + +GeogSlice = 2; +DimSize = 2*GeogSlice+1; + +% tic +SegLatLon = zeros(400-GeogSlice, 700-GeogSlice,7,2*GeogSlice+1,2*GeogSlice+1); +idxSeg = 0; + + +for idxLat = GeogSlice+1:400-GeogSlice +% idxLat + for idxLon = GeogSlice+1:700-GeogSlice + SegLatLon(idxLat, idxLon, :, :, :) =... + O3Data(:, idxLon-GeogSlice:idxLon+GeogSlice, idxLat-GeogSlice:idxLat+GeogSlice); + end +end + +fprintf('Segments created\n') + +SegVector = reshape(SegLatLon,[],7,DimSize,DimSize); +LatSpace = abs(Lat(2)-Lat(1)); +LatList = [1:DimSize]*LatSpace; +LonSpace = abs(Lon(2)-Lon(1)); +LonList = [1:DimSize]*LonSpace; +[X, Y] = meshgrid(LonList,LatList); +LatLon = repmat([X(:),Y(:)], 7, 1); + +end + diff --git a/Assessment/Processing Mean Time Per Processors V1.fig b/Assessment/Processing Mean Time Per Processors V1.fig new file mode 100644 index 0000000..2c0d3ec Binary files /dev/null and b/Assessment/Processing Mean Time Per Processors V1.fig differ diff --git a/Assessment/Processing Speeds Per Processors Perfect Graph.fig b/Assessment/Processing Speeds Per Processors Perfect Graph.fig new file mode 100644 index 0000000..303e07d Binary files /dev/null and b/Assessment/Processing Speeds Per Processors Perfect Graph.fig differ diff --git a/Assessment/Processing Speeds Per Processors Perfect Graph.jpg b/Assessment/Processing Speeds Per Processors Perfect Graph.jpg new file mode 100644 index 0000000..7e535a1 Binary files /dev/null and b/Assessment/Processing Speeds Per Processors Perfect Graph.jpg differ diff --git a/Assessment/Processing Speeds Per Processors.fig b/Assessment/Processing Speeds Per Processors.fig new file mode 100644 index 0000000..fca0627 Binary files /dev/null and b/Assessment/Processing Speeds Per Processors.fig differ diff --git a/Assessment/ProcessingResults.txt b/Assessment/ProcessingResults.txt new file mode 100644 index 0000000..aa75b12 --- /dev/null +++ b/Assessment/ProcessingResults.txt @@ -0,0 +1,363 @@ +28-Nov-2021 04:49:14: Processing Data of Size 10 s + +28-Nov-2021 04:49:54: Processing Data of Size 10 s + +28-Nov-2021 04:50:03: Total time for processing with 10 of data sequentialy = 8.14 s + +28-Nov-2021 04:50:43: Total time for processing with 10 of data and 1 workers = 2.16 s + +28-Nov-2021 04:51:12: Total time for processing with 10 of data and 2 workers = 1.46 s + +28-Nov-2021 04:51:40: Total time for processing with 10 of data and 3 workers = 1.41 s + +28-Nov-2021 04:52:11: Total time for processing with 10 of data and 4 workers = 1.81 s + +28-Nov-2021 04:52:47: Total time for processing with 10 of data and 5 workers = 1.71 s + +28-Nov-2021 04:53:25: Total time for processing with 10 of data and 6 workers = 1.78 s + +28-Nov-2021 04:54:07: Total time for processing with 10 of data and 7 workers = 2.08 s + +28-Nov-2021 04:54:56: Total time for processing with 10 of data and 8 workers = 2.45 s + +28-Nov-2021 04:54:56: Processing Data of Size 20 s + +28-Nov-2021 04:55:04: Total time for processing with 20 of data sequentialy = 8.24 s + +28-Nov-2021 04:55:28: Total time for processing with 20 of data and 1 workers = 1.56 s + +28-Nov-2021 04:55:53: Total time for processing with 20 of data and 2 workers = 1.94 s + +28-Nov-2021 04:56:23: Total time for processing with 20 of data and 3 workers = 2.06 s + +28-Nov-2021 04:56:56: Total time for processing with 20 of data and 4 workers = 2.19 s + +28-Nov-2021 04:57:32: Total time for processing with 20 of data and 5 workers = 2.43 s + +28-Nov-2021 04:58:11: Total time for processing with 20 of data and 6 workers = 2.41 s + +28-Nov-2021 04:58:56: Total time for processing with 20 of data and 7 workers = 2.91 s + +28-Nov-2021 04:59:45: Total time for processing with 20 of data and 8 workers = 2.98 s + +28-Nov-2021 04:59:45: Processing Data of Size 30 s + +28-Nov-2021 04:59:53: Total time for processing with 30 of data sequentialy = 8.03 s + +28-Nov-2021 05:00:17: Total time for processing with 30 of data and 1 workers = 2.22 s + +28-Nov-2021 05:10:51: Total time for processing with 30 of data and 2 workers = 3.27 s + +28-Nov-2021 05:11:24: Total time for processing with 30 of data and 3 workers = 2.92 s + +28-Nov-2021 05:11:57: Total time for processing with 30 of data and 4 workers = 2.96 s + +28-Nov-2021 05:12:36: Total time for processing with 30 of data and 5 workers = 3.07 s + +28-Nov-2021 05:13:18: Total time for processing with 30 of data and 6 workers = 3.39 s + +28-Nov-2021 05:14:06: Total time for processing with 30 of data and 7 workers = 3.59 s + +28-Nov-2021 05:14:57: Total time for processing with 30 of data and 8 workers = 3.78 s + +28-Nov-2021 05:18:05: Processing Data of Size 250 s + +28-Nov-2021 05:18:21: Total time for processing with 250 of data sequentialy = 16.48 s + +28-Nov-2021 05:18:54: Total time for processing with 250 of data and 1 workers = 9.81 s + +28-Nov-2021 05:19:25: Total time for processing with 250 of data and 2 workers = 7.75 s + +28-Nov-2021 05:19:59: Total time for processing with 250 of data and 3 workers = 7.24 s + +28-Nov-2021 05:20:39: Total time for processing with 250 of data and 4 workers = 9.39 s + +28-Nov-2021 05:21:22: Total time for processing with 250 of data and 5 workers = 9.96 s + +28-Nov-2021 05:22:12: Total time for processing with 250 of data and 6 workers = 9.71 s + +28-Nov-2021 05:23:07: Total time for processing with 250 of data and 7 workers = 9.84 s + +28-Nov-2021 05:24:08: Total time for processing with 250 of data and 8 workers = 14.09 s + +28-Nov-2021 05:24:08: Processing Data of Size 500 s + +28-Nov-2021 05:24:29: Total time for processing with 500 of data sequentialy = 20.99 s + +28-Nov-2021 05:25:04: Total time for processing with 500 of data and 1 workers = 15.55 s + +28-Nov-2021 05:25:37: Total time for processing with 500 of data and 2 workers = 12.52 s + +28-Nov-2021 05:26:17: Total time for processing with 500 of data and 3 workers = 14.15 s + +28-Nov-2021 05:26:59: Total time for processing with 500 of data and 4 workers = 13.82 s + +28-Nov-2021 05:27:49: Total time for processing with 500 of data and 5 workers = 14.33 s + +28-Nov-2021 05:28:42: Total time for processing with 500 of data and 6 workers = 14.23 s + +28-Nov-2021 05:30:38: Total time for processing with 500 of data and 7 workers = 22.47 s + +28-Nov-2021 05:31:53: Total time for processing with 500 of data and 8 workers = 20.71 s + +28-Nov-2021 05:31:53: Processing Data of Size 1000 s + +28-Nov-2021 05:32:35: Total time for processing with 1000 of data sequentialy = 42.31 s + +28-Nov-2021 05:33:31: Total time for processing with 1000 of data and 1 workers = 35.25 s + +28-Nov-2021 05:34:20: Total time for processing with 1000 of data and 2 workers = 27.17 s + +28-Nov-2021 05:35:12: Total time for processing with 1000 of data and 3 workers = 26.14 s + +28-Nov-2021 05:36:17: Total time for processing with 1000 of data and 4 workers = 34.84 s + +28-Nov-2021 05:37:23: Total time for processing with 1000 of data and 5 workers = 28.94 s + +28-Nov-2021 05:38:30: Total time for processing with 1000 of data and 6 workers = 30.51 s + +28-Nov-2021 05:39:46: Total time for processing with 1000 of data and 7 workers = 34.62 s + +28-Nov-2021 05:41:10: Total time for processing with 1000 of data and 8 workers = 35.56 s + +28-Nov-2021 05:44:25: Processing Data of Size 250 s + +28-Nov-2021 05:44:42: Total time for processing with 250 of data sequentialy = 16.51 s + +28-Nov-2021 05:45:13: Total time for processing with 250 of data and 1 workers = 9.17 s + +28-Nov-2021 05:45:44: Total time for processing with 250 of data and 2 workers = 7.96 s + +28-Nov-2021 05:46:18: Total time for processing with 250 of data and 3 workers = 7.50 s + +28-Nov-2021 05:46:56: Total time for processing with 250 of data and 4 workers = 8.97 s + +28-Nov-2021 05:58:58: Total time for processing with 250 of data and 5 workers = 14.26 s + +28-Nov-2021 05:59:51: Total time for processing with 250 of data and 6 workers = 9.69 s + +28-Nov-2021 06:00:50: Total time for processing with 250 of data and 7 workers = 10.53 s + +28-Nov-2021 06:01:51: Total time for processing with 250 of data and 8 workers = 13.98 s + +28-Nov-2021 06:01:51: Processing Data of Size 500 s + +28-Nov-2021 06:02:12: Total time for processing with 500 of data sequentialy = 21.63 s + +28-Nov-2021 06:02:48: Total time for processing with 500 of data and 1 workers = 15.84 s + +28-Nov-2021 06:03:21: Total time for processing with 500 of data and 2 workers = 12.57 s + +28-Nov-2021 06:04:07: Total time for processing with 500 of data and 3 workers = 14.34 s + +28-Nov-2021 06:04:49: Total time for processing with 500 of data and 4 workers = 13.48 s + +28-Nov-2021 06:05:36: Total time for processing with 500 of data and 5 workers = 13.95 s + +28-Nov-2021 06:06:29: Total time for processing with 500 of data and 6 workers = 14.14 s + +28-Nov-2021 06:07:27: Total time for processing with 500 of data and 7 workers = 15.10 s + +28-Nov-2021 06:08:32: Total time for processing with 500 of data and 8 workers = 18.96 s + +28-Nov-2021 06:08:32: Processing Data of Size 1000 s + +28-Nov-2021 06:10:38: Total time for processing with 1000 of data sequentialy = 49.32 s + +28-Nov-2021 06:11:33: Total time for processing with 1000 of data and 1 workers = 35.27 s + +28-Nov-2021 06:12:25: Total time for processing with 1000 of data and 2 workers = 27.23 s + +28-Nov-2021 06:13:15: Total time for processing with 1000 of data and 3 workers = 25.66 s + +28-Nov-2021 06:14:10: Total time for processing with 1000 of data and 4 workers = 26.12 s + +28-Nov-2021 06:15:11: Total time for processing with 1000 of data and 5 workers = 28.71 s + +28-Nov-2021 06:16:17: Total time for processing with 1000 of data and 6 workers = 29.55 s + +28-Nov-2021 06:17:32: Total time for processing with 1000 of data and 7 workers = 33.72 s + +28-Nov-2021 06:18:54: Total time for processing with 1000 of data and 8 workers = 35.58 s + +28-Nov-2021 06:22:35: Processing Data of Size 1000 s + +28-Nov-2021 06:23:15: Total time for processing with 1000 of data sequentialy = 39.95 s + +28-Nov-2021 06:24:09: Total time for processing with 1000 of data and 1 workers = 33.65 s + +28-Nov-2021 06:24:56: Total time for processing with 1000 of data and 2 workers = 25.63 s + +28-Nov-2021 06:25:46: Total time for processing with 1000 of data and 3 workers = 25.29 s + +28-Nov-2021 06:26:42: Total time for processing with 1000 of data and 4 workers = 26.38 s + +28-Nov-2021 06:27:46: Total time for processing with 1000 of data and 5 workers = 29.40 s + +28-Nov-2021 06:28:55: Total time for processing with 1000 of data and 6 workers = 30.10 s + +28-Nov-2021 06:30:09: Total time for processing with 1000 of data and 7 workers = 32.09 s + +28-Nov-2021 06:31:27: Total time for processing with 1000 of data and 8 workers = 34.11 s + +28-Nov-2021 06:31:27: Processing Data of Size 2000 s + +28-Nov-2021 06:32:51: Total time for processing with 2000 of data sequentialy = 83.37 s + +28-Nov-2021 06:34:32: Total time for processing with 2000 of data and 1 workers = 80.06 s + +28-Nov-2021 06:35:48: Total time for processing with 2000 of data and 2 workers = 55.58 s + +28-Nov-2021 06:37:06: Total time for processing with 2000 of data and 3 workers = 53.78 s + +28-Nov-2021 06:38:27: Total time for processing with 2000 of data and 4 workers = 52.81 s + +28-Nov-2021 06:54:53: Total time for processing with 2000 of data and 5 workers = 68.07 s + +28-Nov-2021 06:56:32: Total time for processing with 2000 of data and 6 workers = 60.32 s + +28-Nov-2021 06:58:19: Total time for processing with 2000 of data and 7 workers = 63.78 s + +28-Nov-2021 07:00:13: Total time for processing with 2000 of data and 8 workers = 67.47 s + +28-Nov-2021 07:00:13: Processing Data of Size 3000 s + +28-Nov-2021 07:02:29: Total time for processing with 3000 of data sequentialy = 135.80 s + +28-Nov-2021 07:04:53: Total time for processing with 3000 of data and 1 workers = 122.21 s + +28-Nov-2021 07:06:51: Total time for processing with 3000 of data and 2 workers = 91.22 s + +28-Nov-2021 07:08:35: Total time for processing with 3000 of data and 3 workers = 79.05 s + +28-Nov-2021 07:10:19: Total time for processing with 3000 of data and 4 workers = 76.33 s + +28-Nov-2021 07:12:13: Total time for processing with 3000 of data and 5 workers = 81.62 s + +28-Nov-2021 07:14:14: Total time for processing with 3000 of data and 6 workers = 84.51 s + +28-Nov-2021 07:16:24: Total time for processing with 3000 of data and 7 workers = 88.46 s + +28-Nov-2021 07:18:40: Total time for processing with 3000 of data and 8 workers = 91.20 s + +28-Nov-2021 18:18:17: Processing Data of Size 5000 s + +28-Nov-2021 18:21:38: Total time for processing with 5000 of data sequentialy = 200.89 s + +28-Nov-2021 18:25:53: Total time for processing with 5000 of data and 1 workers = 223.17 s + +28-Nov-2021 18:28:33: Total time for processing with 5000 of data and 2 workers = 135.83 s + +28-Nov-2021 19:01:26: Total time for processing with 5000 of data and 3 workers = 206.57 s + +28-Nov-2021 19:05:54: Total time for processing with 5000 of data and 4 workers = 230.18 s + +28-Nov-2021 19:10:32: Total time for processing with 5000 of data and 5 workers = 233.60 s + +28-Nov-2021 19:15:17: Total time for processing with 5000 of data and 6 workers = 228.96 s + +28-Nov-2021 19:20:08: Total time for processing with 5000 of data and 7 workers = 228.20 s + +28-Nov-2021 19:25:33: Total time for processing with 5000 of data and 8 workers = 246.89 s + +28-Nov-2021 19:25:33: Processing Data of Size 7500 s + +28-Nov-2021 19:33:45: Total time for processing with 7500 of data sequentialy = 491.92 s + +28-Nov-2021 19:41:29: Total time for processing with 7500 of data and 1 workers = 440.19 s + +28-Nov-2021 19:47:14: Total time for processing with 7500 of data and 2 workers = 321.50 s + +28-Nov-2021 19:52:30: Total time for processing with 7500 of data and 3 workers = 286.45 s + +28-Nov-2021 19:58:27: Total time for processing with 7500 of data and 4 workers = 320.68 s + +28-Nov-2021 20:04:07: Total time for processing with 7500 of data and 5 workers = 298.34 s + +28-Nov-2021 20:59:16: Total time for processing with 7500 of data and 6 workers = 328.00 s + +28-Nov-2021 21:05:33: Total time for processing with 7500 of data and 7 workers = 315.33 s + +28-Nov-2021 21:10:29: Total time for processing with 7500 of data and 8 workers = 229.22 s + +28-Nov-2021 21:10:29: Processing Data of Size 10000 s + +28-Nov-2021 21:18:12: Total time for processing with 10000 of data sequentialy = 441.28 s + +28-Nov-2021 22:34:24: Total time for processing with 10000 of data and 1 workers = 758.85 s + +28-Nov-2021 22:44:33: Total time for processing with 10000 of data and 2 workers = 301.13 s + +28-Nov-2021 22:49:10: Total time for processing with 10000 of data and 3 workers = 251.76 s + +28-Nov-2021 23:23:07: Total time for processing with 10000 of data and 4 workers = 268.16 s + +28-Nov-2021 23:27:42: Total time for processing with 10000 of data and 5 workers = 241.76 s + +28-Nov-2021 23:32:56: Total time for processing with 10000 of data and 6 workers = 264.10 s + +29-Nov-2021 00:09:38: Total time for processing with 10000 of data and 7 workers = 408.06 s + +29-Nov-2021 00:14:38: Total time for processing with 10000 of data and 8 workers = 255.50 s + +29-Nov-2021 00:27:42: Processing Data of Size 8000 s + +29-Nov-2021 00:28:20: Processing Data of Size 8000 s + +29-Nov-2021 00:34:10: Total time for processing with 8000 of data sequentialy = 349.83 s + +29-Nov-2021 01:10:47: Total time for processing with 8000 of data and 1 workers = 369.27 s + +29-Nov-2021 01:18:55: Total time for processing with 8000 of data and 2 workers = 442.25 s + +29-Nov-2021 01:25:38: Total time for processing with 8000 of data and 3 workers = 364.73 s + +29-Nov-2021 01:31:48: Total time for processing with 8000 of data and 4 workers = 324.23 s + +29-Nov-2021 01:37:50: Total time for processing with 8000 of data and 5 workers = 307.69 s + +29-Nov-2021 01:43:43: Total time for processing with 8000 of data and 6 workers = 292.73 s + +29-Nov-2021 01:51:03: Total time for processing with 8000 of data and 7 workers = 370.02 s + +29-Nov-2021 01:58:59: Total time for processing with 8000 of data and 8 workers = 380.90 s + +29-Nov-2021 01:58:59: Processing Data of Size 10000 s + +29-Nov-2021 02:10:12: Total time for processing with 10000 of data sequentialy = 673.28 s + +29-Nov-2021 02:39:32: Total time for processing with 10000 of data and 1 workers = 690.93 s + +29-Nov-2021 02:49:26: Total time for processing with 10000 of data and 2 workers = 561.70 s + +29-Nov-2021 02:56:52: Total time for processing with 10000 of data and 3 workers = 410.20 s + +29-Nov-2021 03:03:16: Total time for processing with 10000 of data and 4 workers = 347.37 s + +29-Nov-2021 03:09:57: Total time for processing with 10000 of data and 5 workers = 354.03 s + +29-Nov-2021 03:18:31: Total time for processing with 10000 of data and 6 workers = 453.07 s + +29-Nov-2021 03:26:29: Total time for processing with 10000 of data and 7 workers = 407.58 s + +29-Nov-2021 03:35:18: Total time for processing with 10000 of data and 8 workers = 446.84 s + +29-Nov-2021 03:35:18: Processing Data of Size 12000 s + +29-Nov-2021 04:05:19: Total time for processing with 12000 of data sequentialy = 855.34 s + +29-Nov-2021 04:20:32: Total time for processing with 12000 of data and 1 workers = 885.07 s + +29-Nov-2021 04:29:24: Total time for processing with 12000 of data and 2 workers = 503.77 s + +29-Nov-2021 05:12:38: Total time for processing with 12000 of data and 3 workers = 531.33 s + +29-Nov-2021 05:22:17: Total time for processing with 12000 of data and 4 workers = 538.84 s + +29-Nov-2021 05:31:22: Total time for processing with 12000 of data and 5 workers = 493.83 s + +29-Nov-2021 05:39:21: Total time for processing with 12000 of data and 6 workers = 425.42 s + +29-Nov-2021 05:47:59: Total time for processing with 12000 of data and 7 workers = 446.15 s + +29-Nov-2021 05:57:44: Total time for processing with 12000 of data and 8 workers = 518.11 s diff --git a/Assessment/Screen Shot 2021-12-06 at 4.03.00 PM.png b/Assessment/Screen Shot 2021-12-06 at 4.03.00 PM.png new file mode 100644 index 0000000..0ee614f Binary files /dev/null and b/Assessment/Screen Shot 2021-12-06 at 4.03.00 PM.png differ diff --git a/Assessment/Screen Shot 2021-12-06 at 4.03.27 PM.png b/Assessment/Screen Shot 2021-12-06 at 4.03.27 PM.png new file mode 100644 index 0000000..d58ad38 Binary files /dev/null and b/Assessment/Screen Shot 2021-12-06 at 4.03.27 PM.png differ diff --git a/Assessment/Screen Shot 2021-12-06 at 4.28.33 PM.png b/Assessment/Screen Shot 2021-12-06 at 4.28.33 PM.png new file mode 100644 index 0000000..8b66e15 Binary files /dev/null and b/Assessment/Screen Shot 2021-12-06 at 4.28.33 PM.png differ diff --git a/Assessment/Sequential Process Flowchart.jpg b/Assessment/Sequential Process Flowchart.jpg new file mode 100644 index 0000000..4cc2b27 Binary files /dev/null and b/Assessment/Sequential Process Flowchart.jpg differ diff --git a/Assessment/SequentialAutomation.1.m b/Assessment/SequentialAutomation.1.m new file mode 100644 index 0000000..b0da562 --- /dev/null +++ b/Assessment/SequentialAutomation.1.m @@ -0,0 +1,86 @@ +%% This script allows you to open and explore the data in a *.nc file +clear all +close all + +FileName = '/Users/youssef/Desktop/5011CEM Resit/NC Files/o3_surface_20180701000000.nc'; + +Contents = ncinfo(FileName); + +Lat = ncread(FileName, 'lat'); % load the latitude locations +Lon = ncread(FileName, 'lon'); % loadthe longitude locations + +%% Processing parameters provided by customer +RadLat = 30.2016; % cluster radius value for latitude +RadLon = 24.8032; % cluster radius value for longitude +RadO3 = 4.2653986e-08; % cluster radius value for the ozone data + +%% Cycle through the hours and load all the models for each hour and record memory use +% We use an index named 'NumHour' in our loop +% The section 'sequential processing' will process the data location one +% after the other, reporting on the time involved. + +StartLat = 1; % latitude location to start laoding +NumLat = 400; % number of latitude locations ot load +StartLon = 1; % longitude location to start loading +NumLon = 700; % number of longitude locations ot load +tic +for NumHour = 1:25 % loop through each hour + fprintf('Processing hour %i\n', NumHour) + DataLayer = 1; % which 'layer' of the array to load the model data into + for idx = [1, 2, 4, 5, 6, 7, 8] % model data to load + % load the model data + HourlyData(DataLayer,:,:) = ncread(FileName, Contents.Variables(idx).Name,... + [StartLon, StartLat, NumHour], [NumLon, NumLat, 1]); + DataLayer = DataLayer + 1; % step to the next 'layer' + end + + % We need to prepare our data for processing. This method is defined by + % our customer. You are not required to understand this method, but you + % can ask your module leader for more information if you wish. + [Data2Process, LatLon] = PrepareData(HourlyData, Lat, Lon); + DataSizes = [10, 20, 30]; + + for idxx = 1:length(DataSizes) + %% Sequential analysis + t1 = toc; + t2 = t1; + LoopParameter = DataSizes(idxx); + for idx = 1:LoopParameter % step through each data location to process the data + fprintf('Looping over datasize %i\n',LoopParameter); + % The analysis of the data creates an 'ensemble value' for each + % location. This method is defined by + % our customer. You are not required to understand this method, but you + % can ask your module leader for more information if you wish. + [EnsembleVector(idx, NumHour)] = EnsembleValue(Data2Process(idx,:,:,:), LatLon, RadLat, RadLon, RadO3); + end + T2(NumHour) = toc - t1; % record the total processing time for this hour + fprintf('Processing hour %i - %.2f s\n\n', NumHour, sum(T2)); + + + end + tSeq = toc; + Results(idx,:) = [DataSizes(idxx),tSeq]; + fprintf('Total time for sequential processing = %.2f s\n\n', tSeq) + +end + + + + + + + + + + + + + + + + + + + + + diff --git a/Assessment/SequentialAutomation.asv b/Assessment/SequentialAutomation.asv new file mode 100644 index 0000000..21cd692 --- /dev/null +++ b/Assessment/SequentialAutomation.asv @@ -0,0 +1,56 @@ +function tSeq = SequentialAutomation(DataSize, FileName) +%% This script allows you to open and explore the data in a *.nc file +close all +Contents = ncinfo(FileName); +Lat = ncread(FileName, 'lat'); % load the latitude locations +Lon = ncread(FileName, 'lon'); % loadthe longitude locations + +%% Processing parameters provided by customer + +RadLat = 30.2016; % cluster radius value for latitude +RadLon = 24.8032; % cluster radius value for longitude +RadO3 = 4.2653986e-08; % cluster radius value for the ozone data + +%% Cycle through the hours and load all the models for each hour and record memory use +% We use an index named 'NumHour' in our loop +% The section 'sequential processing' will process the data location one +% after the other, reporting on the time involved. +StartLat = 1; % latitude location to start laoding +NumLat = 400; % number of latitude locations ot load +StartLon = 1; % longitude location to start loading +NumLon = 700; % number of longitude locations ot load +tic +for NumHour = 1:3 % loop through each hour + fprintf('Processing hour %i\n', NumHour) + DataLayer = 1; % which 'layer' of the array to load the model data into + for idx = [1, 2, 4, 5, 6, 7, 8] % model data to load + % load the model data + HourlyData(DataLayer,:,:) = ncread(FileName, Contents.Variables(idx).Name,... + [StartLon, StartLat, NumHour], [NumLon, NumLat, 1]); + DataLayer = DataLayer + 1; % step to the next 'layer' + end + + % We need to prepare our data for processing. This method is defined by + % our customer. You are not required to understand this method, but you + % can ask your module leader for more information if you wish. + [Data2Process, LatLon] = PrepareData(HourlyData, Lat, Lon); + + %% Sequential analysis + t1 = toc; + t2 = t1; + + for idx = 1: DataSize % step through each data location to process the data + + % The analysis of the data creates an 'ensemble value' for each + % location. This method is defined by + % our customer. You are not required to understand this method, but you + % can ask your module leader for more information if you wish. + [EnsembleVector(idx, NumHour)] = EnsembleValue(Data2Process(idx,:,:,:), LatLon, RadLat, RadLon, RadO3); + end + T2(NumHour) = toc - t1; % record the total processing time for this hour + fprintf('Processing hour %i - %.2f s\n\n', NumHour, sum(T2)); + +end +tSeq = toc; +fprintf('\nTotal time for sequential processing = %.2f s\n\n', tSeq) +end \ No newline at end of file diff --git a/Assessment/SequentialAutomation.m b/Assessment/SequentialAutomation.m new file mode 100644 index 0000000..ee8e7f8 --- /dev/null +++ b/Assessment/SequentialAutomation.m @@ -0,0 +1,50 @@ +function tSeq = SequentialAutomation(DataSize, FileName, Contents, Lat, Lon) +%% Processing parameters provided by customer + +RadLat = 30.2016; % cluster radius value for latitude +RadLon = 24.8032; % cluster radius value for longitude +RadO3 = 4.2653986e-08; % cluster radius value for the ozone data + +%% Cycle through the hours and load all the models for each hour and record memory use +% We use an index named 'NumHour' in our loop +% The section 'sequential processing' will process the data location one +% after the other, reporting on the time involved. +StartLat = 1; % latitude location to start laoding +NumLat = 400; % number of latitude locations ot load +StartLon = 1; % longitude location to start loading +NumLon = 700; % number of longitude locations ot load +tic +for NumHour = 1:3 % loop through each hour + fprintf('Processing hour %i\n', NumHour) + DataLayer = 1; % which 'layer' of the array to load the model data into + for idx = [1, 2, 4, 5, 6, 7, 8] % model data to load + % load the model data + HourlyData(DataLayer,:,:) = ncread(FileName, Contents.Variables(idx).Name,... + [StartLon, StartLat, NumHour], [NumLon, NumLat, 1]); + DataLayer = DataLayer + 1; % step to the next 'layer' + end + + % We need to prepare our data for processing. This method is defined by + % our customer. You are not required to understand this method, but you + % can ask your module leader for more information if you wish. + [Data2Process, LatLon] = PrepareData(HourlyData, Lat, Lon); + + %% Sequential analysis + t1 = toc; + t2 = t1; + + for idx = 1: DataSize % step through each data location to process the data + + % The analysis of the data creates an 'ensemble value' for each + % location. This method is defined by + % our customer. You are not required to understand this method, but you + % can ask your module leader for more information if you wish. + [EnsembleVector(idx, NumHour)] = EnsembleValue(Data2Process(idx,:,:,:), LatLon, RadLat, RadLon, RadO3); + end + T2(NumHour) = toc - t1; % record the total processing time for this hour + fprintf('Processing hour %i - %.2f s\n\n', NumHour, sum(T2)); + +end +tSeq = toc; +fprintf('\nTotal time for sequential processing = %.2f s\n\n', tSeq) +end \ No newline at end of file diff --git a/Assessment/TextScriptTest.m b/Assessment/TextScriptTest.m new file mode 100644 index 0000000..7af736e --- /dev/null +++ b/Assessment/TextScriptTest.m @@ -0,0 +1,32 @@ +function [TextErrors] = TextScriptTest(FileName) +%TEXTSCRIPTTEST Summary of this function goes here +% Detailed explanation goes here +TextErrors = 0; +DataTypes = {'NC_Byte', 'NC_Char', 'NC_Short', 'NC_Int', 'NC_Float', 'NC_Double'}; +Contents = ncinfo(FileName); % Store the file content information in a variable. +FileID = netcdf.open(FileName,'NC_NOWRITE'); % open file read only and create handle +%LogFileName = 'TextTestLog.txt'; +LogID = fopen('TextTestLog.txt','w'); +fprintf(LogID, '%s: Looking for NaN data in %s.. \n', datestr(now, 0), FileName); +for idx = 0:size(Contents.Variables,2)-1 % loop through each variable + % read data type for each variable and store + [~, datatype(idx+1), ~, ~] = netcdf.inqVar(FileID,idx); +end +%% display data types +DataInFile = DataTypes(datatype)'; +%% find character data types +FindText = strcmp('NC_Char', DataInFile); +%% print results +fprintf('Testing file: %s\n', FileName) +if any(FindText) + fprintf('Error, text variables present:\n') + TextErrors = 1; + fprintf(LogID, '%s: Text errors present!\n', datestr(now, 0)); +else + fprintf('All data is numeric, continue analysis.\n') + fprintf(LogID, '%s: All data is numeric, continue analysis\n', datestr(now, 0)); +end +fprintf(LogID, '%s\n', DataInFile{:}); +fclose(LogID); +end + diff --git a/Assessment/TextTestLog.txt b/Assessment/TextTestLog.txt new file mode 100644 index 0000000..c6822d8 --- /dev/null +++ b/Assessment/TextTestLog.txt @@ -0,0 +1,13 @@ +29-Nov-2021 20:35:03: Looking for NaN data in C:\Users\eldieby\Desktop\5011CEM Resit\NC Files\o3_surface_20180701000000.nc.. +29-Nov-2021 20:35:03: All data is numeric, continue analysis +NC_Float +NC_Float +NC_Float +NC_Float +NC_Float +NC_Float +NC_Float +NC_Float +NC_Float +NC_Float +NC_Float