Final Commit

Regular Commits Didnt work will email Dr. Hyde with evidence of regular commits
eldieby · Dec 6, 2021 · 0e2bed30d0a4d6c66a97af887ddd28e82b913604 · 0e2bed3
commit 0e2bed30d0a4d6c66a97af887ddd28e82b913604
Show file tree

Hide file tree

Showing 32 changed files with 1,173 additions and 0 deletions.
diff --git a/Assessment/5011CEM Report.docx b/Assessment/5011CEM Report.docx
diff --git a/Assessment/5011CEMResit.jpg b/Assessment/5011CEMResit.jpg
diff --git a/Assessment/DDC_ver01_1_CAMS.m b/Assessment/DDC_ver01_1_CAMS.m
@@ -0,0 +1,214 @@
+function [ Clusters, Results ] = DDC_ver01_1_CAMS( varargin )
+%DDC_VER01.1 Data Density Based Clustering
+% Copyright R Hyde 2017
+% Released under the GNU GPLver3.0
+% You should have received a copy of the GNU General Public License
+% along with this program.  If not, see <http://www.gnu.org/licenses/
+% If you use this file please acknowledge the author and cite as a
+% reference:
+% Hyde, R.; Angelov, P., "Data density based clustering," Computational
+% Intelligence (UKCI), 2014 14th UK Workshop on , vol., no., pp.1,7, 8-10 Sept. 2014
+% doi: 10.1109/UKCI.2014.6930157
+% Downloadable from: http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=6930157&isnumber=6930143
+%
+% ACCMIP revision: *Try clusters with no distance in any dimension set grid
+% dimension instead of remaining as initial radii (Line 132)
+%
+% Data Density Based Clustering with Manual Radii
+% Useage:
+% [Clusters, Results]=DDC_ver01(DataIn, InitR, Merge, Verbose]
+% Inputs:
+% DataIn:   m x n array of data for clustering m rows of samples, data
+%           should be normalised 0-1 or scaled appropriately
+% InitR:    initial radii array with radius in each diension, if a single
+%           value is provided the same value will be used for all.
+% Verbose:  1 - plot output of progress, 0 - silent
+% Merge:    Flag=1 to merge clusters if centre is within the ellipse of
+%           another
+% Outputs:
+% Results:  array of data with cluster number appended as last column
+% Clusters: array of cluster centre co-ords and radii
+
+%% Start Main Function
+if size(varargin,2)<4
+    sprintf('Input error - not enough inputs.')
+    return
+end
+DataIn=varargin{1}; % Read Array of Data
+InitR=varargin{2}; % Read Initial Radius
+Verbose=varargin{4}; % Read flag for plotting during the run
+Merge=varargin{3}; % Read flag for merging final clusters
+% defaults to use instead of initial radii, 1 grid in lat, lon, scaled O3 std dev in case of cluster radii = 0
+% DefaultRadii = [getappdata(CatacombePaper,'GridDistance'),...
+%     getappdata(CatacombePaper,  'RadO3')]; 
+
+%% Initialise
+if size(InitR,2)==1 % create equal radii across each domension if only one provided
+    if Verbose==1
+    sprintf('Using equal radii')
+    end
+    InitR=ones(size(DataIn,2),1)*InitR;
+elseif size(InitR,2)<size(DataIn,2)
+    sprintf('Radii not single or equal to data dimensions')
+    return
+end
+NumClusters=0; % initial number of clusters
+Results=zeros(size(DataIn,1),size(DataIn,2)+1); % initiate empty array
+
+%% ### DDC routine ###
+Glob_Mean=mean(DataIn,1); % array of means of data dim
+Glob_Scalar=sum(sum((DataIn.*DataIn),2),1)/size(DataIn,1); % array of scalar products for each data dim
+
+while size(DataIn,1)>0
+    % size(DataIn,1) % uncomment to trace remaining data
+    NumClusters=NumClusters+1;
+    Clusters.Rad(NumClusters,:)=InitR;
+    %% Find Cluster Centre
+    Glob_Mean=mean(DataIn,1); % array of means of data dim
+    Glob_Scalar=sum(sum((DataIn.*DataIn),2),1)/size(DataIn,1); % array of scalar products for each data dim
+  % full calculations
+%     GDensity=1./(1+(pdist2(DataIn,Glob_Mean,'euclidean').^2)+Glob_Scalar-(sum(Glob_Mean.^2))); % calculate global densities
+%     [~, CentreIndex]=max(GDensity); % find index of max densest point
+    % slim calculations
+    GDensity=pdist2(DataIn,Glob_Mean,'euclidean').^2 + Glob_Scalar - sum(Glob_Mean.^2); % calculate global densities
+    [~, CentreIndex]=min(GDensity); % find index of max densest point
+
+    %% Find points belonging to cluster
+    Include=bsxfun(@minus,DataIn,DataIn(CentreIndex,:)).^2; % sum square of distances from centre
+    RadSq=Clusters.Rad(NumClusters,:).^2; % square radii
+    Include=sum(bsxfun(@rdivide,Include,RadSq),2); % divide by radii and add terms
+    Include=find(Include<1);
+
+    %% Remove outliers >3sigma
+    Dist=pdist2(DataIn(Include,:),DataIn(CentreIndex,:)); % distances to all potential members
+    Include=Include(abs(Dist - mean(Dist) <= 3*std(Dist))==1,:); % keep only indices of samples with 3 sigma
+
+    %% Move cluster centre to local densest point
+    LocMean=mean(DataIn(Include,:),1);
+    LocScalar=sum((DataIn(Include,:).^2),2)/size(Include,1); % array of scalar products of data dims
+    % full calculations
+%     LocDens=1./(1+(pdist2(DataIn(Include,:),LocMean,'euclidean').^2)+LocScalar-(sum(LocMean.^2))); % calculate local densities
+%     [~,CentreIndex]=max(LocDens);
+    % slim calculations
+    LocDens=pdist2(DataIn(Include,:),LocMean,'euclidean').^2 + LocScalar - sum(LocMean.^2); % calculate local densities
+    [~,CentreIndex]=min(LocDens);
+    CentreIndex=Include(CentreIndex);
+    Clusters.Centre(NumClusters,:)=DataIn(CentreIndex,:); % assign cluster centre
+
+    %% Assign data to new centre
+    Include=bsxfun(@minus,DataIn,Clusters.Centre(NumClusters,:)).^2; % sum square of distances from centre
+    RadSq=Clusters.Rad(NumClusters,:).^2; % square radii
+    Include=sum(bsxfun(@rdivide,Include,RadSq),2); % divide by radii and add terms
+    Include=find(Include<1);
+
+    %% Remove outliers >3sigma
+    Dist=pdist2(Clusters.Centre(NumClusters,:),DataIn(Include,:)); % distances to all potential members
+    Include=Include(abs(Dist - mean(Dist) <= 3*std(Dist))==1,:); % keep only indices of samples with 3 sigma
+
+    %% Update radii to maximum distances
+    for idx=1:size(DataIn,2)
+        value01=pdist2(DataIn(Include,idx),Clusters.Centre(NumClusters,idx),'Euclidean');
+        if max(value01)>0
+            Clusters.Rad(NumClusters,idx)=max(value01);
+        end
+    end
+
+    %% Assign data to cluster based on new radii
+    Include=bsxfun(@minus,DataIn,Clusters.Centre(NumClusters,:)).^2; % sum square of distances from centre
+    RadSq=Clusters.Rad(NumClusters,:).^2; % square radii
+    Include=sum(bsxfun(@rdivide,Include,RadSq),2); % divide by radii and add terms
+    Include=find(Include<1);
+
+    %% Remove outliers >3sigma
+    Dist=pdist2(Clusters.Centre(NumClusters,:),DataIn(Include,:)); % distances to all potential members
+    Include=Include(abs(Dist - mean(Dist) <= 3*std(Dist))==1,:); % keep only indices of samples with 3 sigma
+
+    %% Update radii to maximum distances
+
+    for idx=1:size(DataIn,2)
+        value01=pdist2(DataIn(Include,idx),Clusters.Centre(NumClusters,idx),'Euclidean');
+        if max(value01)>0
+            Clusters.Rad(NumClusters,idx)=max(value01);
+        else
+%             Clusters.Rad(NumClusters,idx)=DefaultRadii(idx);
+        end
+    end
+
+    %% Plot
+    if Verbose==1
+        hold off;scatter(DataIn(:,1),DataIn(:,2));hold on
+        scatter(DataIn(CentreIndex,1),DataIn(CentreIndex,2),'r')
+        scatter(DataIn(Include,1),DataIn(Include,2),'g');
+        scatter(Clusters.Centre(NumClusters,1),Clusters.Centre(NumClusters,2),'*','r')
+        title(sprintf('Clustered: %i, Remaining: %i',size(Results,1)-size(DataIn,1), size(DataIn,1)))
+        axis([0 1 0 1])
+        drawnow
+        for zz=1:size(Clusters.Centre,1)
+            rectangle('Position',[Clusters.Centre(zz,1)-Clusters.Rad(zz,1), Clusters.Centre(zz,2)-Clusters.Rad(zz,2), 2*Clusters.Rad(zz,1), 2*Clusters.Rad(zz,2)],'Curvature',[1,1])
+        end
+    end
+    %% Assign data to final clusters
+    StartIdx=find(all(Results==0,2),1,'first');
+    EndIdx=StartIdx+size(Include,1)-1;
+    Results(StartIdx:EndIdx,:)=[DataIn(Include,:),ones(size(Include,1),1)*NumClusters];
+    DataIn(Include,:)=[]; % remove clustered data
+end
+
+%% Merge clusters if centre is within another cluster
+if Merge==1
+MergeAny=1;
+    while MergeAny==1
+        if Verbose==1
+            figure(2)
+            clf
+            for zz=1:size(Clusters.Centre,1)
+                rectangle('Position',[Clusters.Centre(zz,1)-Clusters.Rad(zz,1),...
+                    Clusters.Centre(zz,2)-Clusters.Rad(zz,2), 2*Clusters.Rad(zz,1),...
+                    2*Clusters.Rad(zz,2)],'Curvature',[1,1])
+            end
+            hold on
+            scatter(Clusters.Centre(:,1),Clusters.Centre(:,2),'*','r')
+            drawnow
+        end
+
+        MergeAny=0;
+        Merges=[];
+        % for each cluster & find if cluster centre is within other clusters
+        for idx1=1:size(Clusters.Centre,1); 
+            InEll=bsxfun(@minus,Clusters.Centre,Clusters.Centre(idx1,1:end)).^2;
+            InEll=sum(bsxfun(@rdivide,InEll,Clusters.Rad(idx1,:).^2),2); % divide by rad^2 & add
+            InEll=(InEll<1);
+            Merges(idx1,:)=InEll.';
+        end
+        Merges(logical(eye(size(Merges))))=0;
+        % Merge clusters
+        for idx=1:size(Clusters.Centre,1)
+            [~,idx1]=find(Merges(idx,:),1);
+            Results(ismember(Results(:,end),idx1),end)=idx;
+            if idx1
+                MergeAny=1;
+            end
+        end
+        %% renumber clusters
+        [C,~,ic]=unique(Results(:,end));
+        C=1:size(C,1);
+        Results(:,end)=C(ic);
+        %% Re-create cluster data
+        Clusters.Centre=[];
+        Clusters.Rad=[];
+        for idx1=1:max(Results(:,end))
+            Clusters.Centre(idx1,:)=mean(Results(Results(:,3)==idx1,1:end-1),1);
+            for idx2=1:size(Results,2)-1
+                value01=pdist2(Results(Results(:,3)==idx1,idx2),Clusters.Centre(idx1,idx2),'Euclidean');
+                if max(value01)>0
+                    Clusters.Rad(idx1,idx2)=max(value01);
+                else
+                    Clusters.Rad(idx1,idx2)=0;
+                end
+            end
+        end
+
+    end
+end
+
+end % end function
diff --git a/Assessment/EnsembleValue.m b/Assessment/EnsembleValue.m
@@ -0,0 +1,13 @@
+function EV = EnsembleValue(Data, LatLon, RadLat, RadLon, RadO3)
+
+%ENSEMBLEVALUE Summary of this function goes here
+%   Detailed explanation goes here
+
+
+Data4Cluster = [Data(:),LatLon];
+[Clusters, Results] = DDC_ver01_1_CAMS(Data4Cluster, [RadLat, RadLon, RadO3], 0, 0);
+MostCommonCluster = mode(Results(:,end));
+EV = Clusters.Centre(MostCommonCluster);
+
+end
+
diff --git a/Assessment/GraphAutomation.m b/Assessment/GraphAutomation.m
@@ -0,0 +1,33 @@
+function [] = GraphAutomation(dataSize,x1Vals,y1Vals,x2Vals,y2Vals,x3Vals,y3Vals)
+
+figure('Name','Processing Speeds Per Processors','NumberTitle','off');
+yyaxis left
+plot(x1Vals, y1Vals, '-bd')
+hold on
+ylabel('Processing time (s)')
+yyaxis right
+plot(x2Vals, y2Vals, '-rx')
+hold on
+plot(x3Vals, y3Vals, '-gs')
+xlabel('Number of Processors')
+ylabel('Processing time (s)')
+title('Processing time vs number of processors')
+legend(num2str(dataSize(1)), num2str(dataSize(2)), num2str(dataSize(3)))
+saveas(gcf,'Processing Times.png')
+
+%% Mean processing time
+y1MeanVals = y1Vals / dataSize(1);
+y2MeanVals = y2Vals / dataSize(2);
+y3MeanVals = y3Vals / dataSize(3);
+figure('Name','Processing Mean Time Per Processors','NumberTitle','off');
+plot(x1Vals, y1MeanVals, '-bd')
+hold on
+plot(x2Vals, y2MeanVals, '-rx')
+hold on
+plot(x3Vals, y3MeanVals, '-gs')
+xlabel('Number of Processors')
+ylabel('Processing time (s)')
+title('Mean Processing time vs number of processors')
+legend(num2str(dataSize(1)), num2str(dataSize(2)), num2str(dataSize(3)))
+saveas(gcf,'Mean Processing Times.png')
+end
diff --git a/Assessment/Graphing Flowchart.jpg b/Assessment/Graphing Flowchart.jpg
diff --git a/Assessment/LogBook.numbers b/Assessment/LogBook.numbers
diff --git a/Assessment/Main Process Flowchart.jpg b/Assessment/Main Process Flowchart.jpg
diff --git a/Assessment/Main.asv b/Assessment/Main.asv
@@ -0,0 +1,28 @@
+addpath '/Users/youssef/Desktop/5011CEM Resit/Common Files'
+FileName = '/Users/youssef/Desktop/5011CEM Resit/NC Files/o3_surface_20180701000000.nc'; %%%Insert File Name to be ran
+DataSizes = [8000, 10000, 12000]; %%%Insert the amount of data you would like to process from the model per hour
+LogFileName = 'ProcessingResults.txt';
+LogID = fopen('ProcessingResults.txt', 'a');
+Processing_Times = [];
+Processors = [0, 1, 2, 3, 4, 5, 6, 7, 8];
+if TextScriptTest(FileName) == 1
+    fprintf('Script Contains Text Errors: Refer to TextTestLog.txt for more info\n')
+    return
+else
+    fprintf('Tests Successful! \n Processing Initiate\n')
+    for idx = 1:length(DataSizes)
+        fprintf(LogID, '%s: Processing Data of Size %i s\n\n', datestr(now, 0), DataSizes(idx));
+        Processing_Times(idx, 1) = SequentialAutomation(DataSizes(idx),FileName);
+        fprintf(LogID, '%s: Total time for processing with %i of data sequentialy = %.2f s\n\n', datestr(now, 0), DataSizes(idx), Processing_Times(idx, 1));
+        for workers = 1:8
+            Processing_Times(idx, workers+1) = ParallelAutomation(workers, DataSizes(idx),FileName);
+            fprintf(LogID, '%s: Total time for processing with %i of data and %i workers = %.2f s\n\n', datestr(now, 0), DataSizes(idx), workers, Processing_Times(idx, workers+1));
+        end
+    end
+    for element = 1:numel(Processing_Times)
+        Processing_Times(element) = (Processing_Times(element)/3)*25;
+    end
+    GraphAutomation(DataSizes, Processors, Processing_Times(1, :), Processors, Processing_Times(2, :), Processors, Processing_Times(3, :));
+    fprintf('Processing Successful, Please refer to ProcessingResults.txt for information')
+end
+fclose(LogID);
diff --git a/Assessment/Main.m b/Assessment/Main.m
@@ -0,0 +1,31 @@
+addpath '/Users/youssef/Desktop/5011CEM Resit/Common Files'
+FileName = '/Users/youssef/Desktop/5011CEM Resit/NC Files/o3_surface_20180701000000.nc'; %%%Insert File Name to be ran
+DataSizes = [10, 20, 30]; %%%Insert the amount of data you would like to process from the model per hour
+LogFileName = 'ProcessingResults.txt';
+LogID = fopen('ProcessingResults.txt', 'a');
+Processing_Times = [];
+Processors = [0, 1, 2, 3, 4, 5, 6, 7, 8];
+Contents = ncinfo(FileName);
+Lat = ncread(FileName, 'lat'); % load the latitude locations
+Lon = ncread(FileName, 'lon'); % loadthe longitude locations
+if TextScriptTest(FileName) == 1
+    fprintf('Script Contains Text Errors: Refer to TextTestLog.txt for more info\n')
+    return
+else
+    fprintf('Tests Successful! \n Processing Initiate\n')
+    for idx = 1:length(DataSizes)
+        fprintf(LogID, '%s: Processing Data of Size %i s\n\n', datestr(now, 0), DataSizes(idx));
+        Processing_Times(idx, 1) = SequentialAutomation(DataSizes(idx),FileName, Contents, Lat, Lon);
+        fprintf(LogID, '%s: Total time for processing with %i of data sequentialy = %.2f s\n\n', datestr(now, 0), DataSizes(idx), Processing_Times(idx, 1));
+        for workers = 1:8
+            Processing_Times(idx, workers+1) = ParallelAutomation(workers, DataSizes(idx),FileName, Contents, Lat, Lon);
+            fprintf(LogID, '%s: Total time for processing with %i of data and %i workers = %.2f s\n\n', datestr(now, 0), DataSizes(idx), workers, Processing_Times(idx, workers+1));
+        end
+    end
+    for element = 1:numel(Processing_Times)
+        Processing_Times(element) = (Processing_Times(element)/3)*25;
+    end
+    GraphAutomation(DataSizes, Processors, Processing_Times(1, :), Processors, Processing_Times(2, :), Processors, Processing_Times(3, :));
+    fprintf('Processing Successful, Please refer to ProcessingResults.txt for information')
+end
+fclose(LogID);
diff --git a/Assessment/Mean Processing Time Per Processors Perfect Graph.fig b/Assessment/Mean Processing Time Per Processors Perfect Graph.fig
diff --git a/Assessment/Mean Processing Time Per Processors Perfect Graph.jpg b/Assessment/Mean Processing Time Per Processors Perfect Graph.jpg
diff --git a/Assessment/NaNScriptTest.m b/Assessment/NaNScriptTest.m
@@ -0,0 +1,39 @@
+function [NaNErrors] = NaNScriptTest(FileName)
+%UNTITLED Summary of this function goes here
+%   Detailed explanation goes here
+%% Test File with Errors
+NaNErrors = 0;
+%% Set file to test;
+Contents = ncinfo(FileName); % Store the file content information in a variable.
+LogFileName = 'NanTestLog.txt'; 
+LogID = fopen('NanTestLog.txt', 'w');
+fprintf(LogID, '%s: Looking for NaN data in %s.. \n', datestr(now, 0), FileName);
+StartLat = 1;
+StartLon = 1;
+
+fprintf('Testing files: %s\n', FileName)
+for idxHour = 1:25
+
+    for idxModel = 1:8
+        Data(idxModel,:,:) = ncread(FileName, Contents.Variables(idxModel).Name,...
+            [StartLat, StartLon, idxHour], [inf, inf, 1]);
+    end
+
+    % check for NaNs
+    if any(isnan(Data), 'All')
+        fprintf('NaNs present during hour %i\n', idxHour)
+        NaNErrors = 1;
+    end
+end
+fprintf('Testing for NaN errors in file: %s\n', FileName)
+fprintf(LogID, 'Testing files: %s\n', FileName);    
+if NaNErrors
+    fprintf('NaN errors present!\n')
+    fprintf(LogID, '%s: NaN errors present!\n', datestr(now, 0));
+else
+    fprintf('No errors!\n')
+    fprintf(LogID, '%s: No errors!\n', datestr(now, 0));
+end
+fclose(LogID);
+end
+
diff --git a/Assessment/PHOTO-2021-11-24-21-27-34.jpg b/Assessment/PHOTO-2021-11-24-21-27-34.jpg
diff --git a/Assessment/Parallel Processing Flowchart.jpg b/Assessment/Parallel Processing Flowchart.jpg