diff --git a/GetTfromP.m b/GetTfromP.m
new file mode 100644
index 0000000..dbc9b11
--- /dev/null
+++ b/GetTfromP.m
@@ -0,0 +1,74 @@
+function [TS_T_ann_interim, extra_c] = GetTfromP(TS_P_ann_interim, HistoricData, info)
+
+ % created 08/10/2019 by Keirnan Fowler, University of Melbourne
+
+ % Creates a synthetic annual timeseries of temperature (T) for each
+ % region based on (a) the synthetic timeseries of precipitation P;
+ % (b) the historic relationship between T and P; and (c) a
+ % timeseries of serially correlated random numbers* used to generate
+ % residuals so that the result does not *exactly* follow the line of
+ % best fit from (b)
+
+ % * note that only a single timeseries of residuals is generated, since
+ % it is assumed that the physical processes leading to the anomolies
+ % act across all regions (more or less).
+
+ % first, generate the timeseries of random numbers for use across all regions
+ RandomNumbers = rand(info.pars.StochRepLen_yrs, 1); % note, serial correlation comes later
+
+ % initialise arrays
+ all.T_synth = []; all.resid = []; all.resid_std = []; all.m = []; all.c = [];
+
+ % for each subarea
+ SubareaList = info.SubareaList';
+ for subarea = SubareaList % equivalent of VBA "For Each subarea in SubareaList"
+
+ % generate a relationship between historic P and T
+ P = HistoricData.precip_annual.(subarea{:});
+ T = HistoricData.T_annual.(subarea{:});
+ [m, c] = LinReg_get_m_and_c(P, T);
+
+ % generate a timeseries of model error and calculate stats
+ T_mod = m * P + c;
+ resid = T_mod - T;
+ resid_std = std(resid);
+ resid_autocorr = kf_autocorr(resid); % disp(['The timeseries of residuals for ' region{1} ' exhibits an autocorrelation of ' num2str(resid_autocorr)])
+
+ % generate synthetic rainfall values that perfectly honour the line of best fit
+ P_synth = TS_P_ann_interim.(subarea{:});
+ T_synth = m * P_synth + c;
+
+ % subject sythetic values to serially correlated random deviations
+ Zstd = norminv(RandomNumbers);
+ RndDev(1) = 0;
+ for iYear = 2:info.pars.StochRepLen_yrs
+ AutoCorr = 0.32; % note, a lag-1 autocorrelation of 0.32 is the average value across the 7 regions (see line 31)
+ RndDev(iYear) = RndDev(iYear-1)*AutoCorr + resid_std*Zstd(iYear);
+ end
+
+ % add modelled values to deviates
+ T_synth = T_synth + RndDev';
+
+ % append variables to arrays
+ all.T_synth(:, end+1) = T_synth; all.resid(:, end+1) = resid; all.resid_std(:, end+1) = resid_std; all.m(:, end+1) = m; all.c(:, end+1) = c;
+
+ end
+
+ % finalise outputs
+ TS_T_ann_interim = array2table(all.T_synth, 'VariableNames', info.SubareaList);
+ extra_c.resid = array2table(all.resid, 'VariableNames', info.SubareaList);
+ extra_c.resid_std = array2table(all.resid_std, 'VariableNames', info.SubareaList);
+ extra_c.m = array2table(all.m, 'VariableNames', info.SubareaList);
+ extra_c.c = array2table(all.c, 'VariableNames', info.SubareaList);
+ extra_c.RandomNumbers = RandomNumbers;
+end
+
+function autocorr = kf_autocorr(TS)
+ temp = corrcoef(TS(1:(end-1)), TS(2:end));
+ autocorr = temp(1, 2);
+end
+
+function [m, c] = LinReg_get_m_and_c(x, y)
+ P = polyfit(x,y,1);
+ m = P(1); c = P(2);
+end
\ No newline at end of file
diff --git a/LowFreq_PreAnalysis.m b/LowFreq_PreAnalysis.m
new file mode 100644
index 0000000..81c92e0
--- /dev/null
+++ b/LowFreq_PreAnalysis.m
@@ -0,0 +1,543 @@
+function [LowFreq_PreAnalysis_Outputs, extra] = LowFreq_PreAnalysis(HistoricData, info)
+
+ % created February 2021 by Keirnan Fowler, University of Melbourne
+
+ % NOTES ABOUT STOCHASTIC GENERATION FOR THE BASE CASE (NO PERTURBATION)
+ % 1. This framework explicity separates the annual precipitation into
+ % high frequency and low frequency components, and stochastically
+ % generates each component separately.
+ % 2. The model used for the low frequency component is called the
+ % 'Broken Line Process'. It has two parameters that need to be
+ % decided: the timescale parameter and the amplitude parameter.
+ % 3. To ensure all subareas synchronously go into and out of multiyear
+ % wet and dry periods, all subareas need to have the same timescale
+ % parameter.
+ % 4. For the base case, the timescale parameter is decided based on
+ % the number of zero crossings in the combined historic low
+ % frequency series (ie. combined across subareas). The formula
+ % used is in Supplementary Material ##.
+ % 5. The amplitude parameter is then decided for each subarea
+ % individually. The basis for deciding is that the Historic Hurst
+ % exponent value for that subarea must be preserved in the final
+ % stochastic data (high+low combined).
+
+
+ % NOTES ABOUT PERTURBATION
+ % 6. As part of the stress test, the low frequency component can be
+ % perturbed.
+ % 7. Perturbations in low frequency component are quantified as a
+ % change in Hurst exponent.
+ % 8. The change in Hurst exponent is acheived solely by changing the
+ % parameters of the Broken Line process - no change to high
+ % frequency stochastic generation is made.
+ % 9. An increase in Hurst can be achieved by changing the low frequency
+ % component so that it has a longer period, or higher amplitude
+ % oscillations, or both. Here, we opt for both.
+ % 10. To understand how this is acheived, imagine the following (for a
+ % given subarea): a 2D plot, x = timescale parameter; y = amplitude
+ % parameter. The base case parameter
+ % values from 4. and 5. plot as a point in this space. For a given
+ % Hurst value, we can connect all the points in the space that
+ % result in that value (post being recombined with the high
+ % frequency component). For a higher Hurst value, the isoline will
+ % be up and to the right (longer period and/or higher amplitude).
+ % 11. During perturbation, the parameters are changed so that the
+ % direction of change is perpendicular to the isolines (or as
+ % close to this as possible given the restrictions in 12). As per
+ % 10, this means the direction will be up and to the right, or put
+ % another way it means the angle will be between 0 and 90 degrees.
+ % 12. As per 3, all regions need to have the same timescale parameter.
+ % The value of this parameter will change with perturbation of
+ % Hurst, but the key point is that for a given perturbation all
+ % subareas need to have the same timescale parameter value. As per
+ % 5, this does not apply to the amplitude parameter. In fact, the
+ % amplitude parameter can change to ensure the desired (perturbed)
+ % Hurst value is obtained in each subarea.
+
+ % THIS CODE
+ % This code is intended as a pre-analysis to be undertaken prior to
+ % stochastic data generation. It is faster to do the routines herein
+ % once only at the start, rather than repeating them for every possible
+ % perturbation.
+
+ % OUTPUT OF THIS CODE
+ % Main output is a set of tables (one for each subarea) where the columns are:
+ % - perturbation
+ % - resulting Hurst value
+ % - timescale parameter value
+ % - amplitude parameter value
+ % The idea is to trial many different perturbation values and then at
+ % run time the required parameter values can be read off the table, or
+ % interpolated if the exact perturbation value has not been tested.
+
+ % STEPS UNDERTAKEN IN THIS CODE
+ % (a) For each subarea, calculate the historic Hurst value.
+ % (b) Calculate the base case timescale parameter as per 4.
+ % (c) For each subarea, stochastically generate the high frequency
+ % component in exactly the same manner as happens at run time.
+ % (d) For each subarea, calculate the base case amplitude parameter as per 5.
+ % (e) For each subarea, test every angle between 1 degree and 89 degrees
+ % (see point 11), in increments of 1 degree. The test involves
+ % moving a set distance (pars.test_distance) in the specified
+ % direction and recording the change in Hurst.
+ % (f) Decide on a direction (to be applied across all subareas) that
+ % gives the greatest change in Hurst value, on average, across all
+ % the subareas.
+ % (g) Calculate the gradient [?Hurst / dist. in 2D space] for each
+ % subarea separately and then take the average across all subareas.
+ % (h) Use the gradient to populate the third column of the tables (ie.
+ % for each perturbation, assign a timescale parameter)
+ % (i) For each perturbation, given the timescale parameter, solve for
+ % the exact value of the amplitude parameter that will give the
+ % desired (perturbed) Hurst value. Do this separately for each
+ % subarea.
+
+ % set perturbations to test (defined as change in Hurst value)
+ % These don't neccesarily have to correspond to the adopted perturbations -
+ % if not, interpolation is used.
+ output_table.perturbation = [-0.03 -0.015 0 +0.015 +0.03 +0.045 +0.06 +0.075];
+
+ %% (a) For each subarea, calculate the historic Hurst value.
+ HistoricHurst = GetHistoricHurst(HistoricData, info);
+
+ %% (b) Calculate the base case timescale parameter as per 4.
+ param_timescale_hist = GetParamTimescaleHist(HistoricData, info);
+
+ %% (c) For each subarea, stochastically generate the high frequency
+ % component in exactly the same manner as happens at run time.
+ TS_P_ann_HighFreq = GetP_Matalas(HistoricData, info);
+
+ %% (d) For each subarea, calculate the base case amplitude parameter as per 5.
+ disp('Starting base case parameter calculations...')
+ [param_amplitude_hist, TS_rand] = GetParamAmp_unpert(HistoricData, param_timescale_hist, info, TS_P_ann_HighFreq, HistoricHurst);
+
+ %% (e) For each subarea, test every angle between 1 degree and 89 degrees
+ % (see point 11), in increments of 1 degree. The test involves
+ % moving a set distance (pars.test_distance) in the specified
+ % direction and recording the change in Hurst.
+ disp('Done. Starting directional tests for Broken Line parameters...')
+ info.pars.test_distance = 0.1; % one tenth of a unit
+ DirectionalTestResults = DirectionalTest(HistoricData, param_timescale_hist, info, TS_P_ann_HighFreq, HistoricHurst, param_amplitude_hist, TS_rand);
+
+ %% (f) Decide on a direction (to be applied across all subareas) that
+ % gives the greatest change in Hurst value, on average, across all
+ % the subareas.
+ % and
+ % (g) Calculate the gradient [?Hurst / dist. in 2D space] for each
+ % subarea separately and then take the average across all subareas.
+ [upslope_angle_deg, grad] = FinaliseDirection(DirectionalTestResults, info);
+
+ %% (h) Use the gradient and angle to populate the second column of the tables (ie.
+ % for each perturbation, assign a timescale parameter)
+ output_table.timescale = GetTimescale(output_table.perturbation, grad, upslope_angle_deg, param_timescale_hist);
+
+ %% (i) For each perturbation, given the timescale parameter, solve for
+ % the exact value of the amplitude parameter that will give the
+ % desired (perturbed) Hurst value. Do this separately for each
+ % subarea.
+ disp('Done. Finalising Broken Line parameters...')
+ [output_table.amplitude, output_table.Hurst] = GetAmplitude(HistoricData, info, TS_P_ann_HighFreq, HistoricHurst, TS_rand, output_table.timescale, output_table.perturbation);
+
+ %% create final output
+ LowFreq_PreAnalysis_Outputs = CreateFinalOutput(output_table, TS_rand, info);
+ extra = CreateExtraOutput(info, DirectionalTestResults, upslope_angle_deg, grad);
+
+end
+
+%% functions
+
+function HistoricHurst = GetHistoricHurst(HistoricData, info)
+ SubareaList = info.SubareaList'; HistoricHurst = [];
+ for subarea = SubareaList % equivalent of VBA "For Each subarea in SubareaList"
+ ThisHistHurst = GetHurst(HistoricData.precip_annual.(subarea{:}), false);
+ HistoricHurst(end+1) = ThisHistHurst;
+ end
+
+ % save as table
+ subarea = SubareaList'; HistoricHurst = HistoricHurst';
+ HistoricHurst = table(subarea, HistoricHurst);
+end
+
+function param_timescale_hist = GetParamTimescaleHist(HistoricData, info)
+ TimeseriesLength = size(HistoricData.precip_ann_LowFreq.adopted_combined, 1);
+ NumZeroCrossings = GetNumZeroCrossings(HistoricData.precip_ann_LowFreq.adopted_combined);
+ param_timescale_hist = TimeseriesLength / (2*NumZeroCrossings + 1); % see Supplementary Material ##
+end
+
+function [param_amplitude_hist, TS_rand] = GetParamAmp_unpert(HistoricData, param_timescale_hist, info, TS_P_ann_HighFreq, HistoricHurst);
+
+ % generate TS_rand, a single timeseries of random deviations for use across all regions.
+ n = size(TS_P_ann_HighFreq, 1); TS_rand = rand(n+1, 1);
+
+ SubareaList = info.SubareaList'; param_amplitude_hist = [];
+ for subarea = SubareaList % equivalent of VBA "For Each subarea in SubareaList"
+
+ pos = strcmp(HistoricHurst.subarea, subarea{:});
+ ThisHistHurst = HistoricHurst.HistoricHurst(pos);
+
+ % do a search to find the best param_amplitude_hist so that the
+ % historic Hurst value is matched
+ fun = @(param_amplitude) abs(ThisHistHurst - GetStochHurstGivenPars(param_timescale_hist, param_amplitude, TS_rand, info, TS_P_ann_HighFreq, HistoricData, subarea));
+ initial_val = 100;
+ options = optimset('TolFun',1e-3, 'TolX', 1e-2);
+ param_amplitude_hist(end+1) = fminsearch(fun, initial_val, options);
+
+ end
+
+ % save as table
+ subarea = SubareaList'; param_amplitude_hist = param_amplitude_hist';
+ param_amplitude_hist = table(subarea, param_amplitude_hist);
+
+end
+
+function DirectionalTestResults = DirectionalTest(HistoricData, param_timescale_hist, info, TS_P_ann_HighFreq, HistoricHurst, param_amplitude_hist, TS_rand)
+
+ % Notes:
+ % The objective is to find the angle (direction in parameter space) that
+ % gives the greatest increase in Hurst. While fminsearch would have
+ % been a good option here, the objective function surface is often
+ % irregular** and thus may be multi-modal, so we use a more manual
+ % approach: We manually test every angle between 1 and 89 degrees, in
+ % 1 degree increments.
+
+ % ** I think because higher values of param_timescale_hist incorporate
+ % progressively less Broken Line segments in the stochastic timeseries,
+ % and the removal of each segment has a unique affect on Hurst.
+
+ SubareaList = info.SubareaList';
+ for subarea = SubareaList % equivalent of VBA "For Each subarea in SubareaList"
+
+ pos = strcmp(HistoricHurst.subarea, subarea{:});
+ ThisHistHurst = HistoricHurst.HistoricHurst(pos);
+ this_param_amplitude_hist = param_amplitude_hist.param_amplitude_hist(pos);
+
+ AnglesToTest = 1:89; % anywhere between 1 degree and 89 degrees, in one-degree increments
+ for i = 1:size(AnglesToTest, 2)
+ angle = AnglesToTest(i);
+ [HurstChange(i), NewHurst(i)] = GetHurstChangeForSlope(angle, param_timescale_hist, this_param_amplitude_hist, ThisHistHurst, TS_rand, info, TS_P_ann_HighFreq, HistoricData, subarea);
+ end
+ DirectionalTestResults.HurstChange.(subarea{:}) = HurstChange;
+ DirectionalTestResults.NewHurst.(subarea{:}) = NewHurst;
+ disp(['Done for subarea ' subarea{:}])
+ end
+
+end
+
+function [upslope_angle_deg, grad] = FinaliseDirection(DirectionalTestResults, info)
+
+ plotting = false; % make true if you want to see a plot
+
+ SubareaList = info.SubareaList'; array_AllSubareas = [];
+ for subarea = SubareaList % equivalent of VBA "For Each subarea in SubareaList"
+ array_AllSubareas(1:89, end+1) = DirectionalTestResults.HurstChange.(subarea{:});
+ if plotting, plot(DirectionalTestResults.HurstChange.(subarea{:}), '-o'); hold on; end
+ end
+ mean_AllSubareas = mean(array_AllSubareas, 2);
+ [delta_Hurst, upslope_angle_deg] = max(mean_AllSubareas);
+ if plotting, plot(mean_AllSubareas, 'LineWidth', 5); end
+
+ % delta_Hurst is the average increase in Hurst over pars.test_distance.
+ % However, we need the gradient, which is the change over a single unit of distance.
+ grad = delta_Hurst / info.pars.test_distance;
+
+end
+
+function timescale_param = GetTimescale(list_of_perturbations, grad, upslope_angle_deg, param_timescale_hist)
+
+ % Use the gradient to assign a timescale parameter for each perturbation.
+
+ % Logic is as follows:
+ % if grad is 0.2 it means the Hurst changes by 0.2 for each unit
+ % of distance traversed in 2D parameter space. So if we need a
+ % perturbation of 0.3 we need to traverse 1.5 units in 2D space.
+ % However, the timescale parameter indicates distance in the x direction,
+ % whereas the above distance is travelled in the direction indicated by
+ % upslope_angle_deg. Thus we need to multiply by cos(angle) to get the
+ % distance on the x-axis.
+
+ NumPert = size(list_of_perturbations, 2);
+ for iPert = 1:NumPert
+ ThisPert = list_of_perturbations(iPert);
+ DistTravelled_angle = ThisPert / grad;
+ DistTravelled_xdir = DistTravelled_angle * cos(upslope_angle_deg / (180/pi()));
+ timescale_param(iPert) = param_timescale_hist + DistTravelled_xdir;
+ end
+
+end
+
+function [amplitude, Hurst] = GetAmplitude(HistoricData, info, TS_P_ann_HighFreq, HistoricHurst, TS_rand, timescales, perturbations)
+
+ SubareaList = info.SubareaList';
+ for subarea = SubareaList % equivalent of VBA "For Each subarea in SubareaList"
+
+ % initialisation
+ pos = strcmp(HistoricHurst.subarea, subarea{:});
+ ThisHistHurst = HistoricHurst.HistoricHurst(pos);
+ amplitude.(subarea{:}) = []; Hurst.(subarea{:}) = [];
+
+ NumPert = size(perturbations, 2);
+ for iPert = 1:NumPert
+
+ % key values for input into search algorithm
+ ThisPert = perturbations(iPert);
+ TargetHurst = ThisHistHurst + ThisPert;
+ ThisParamTimescale = timescales(iPert);
+
+ % given the above, we now conduct a search to identify the
+ % value of the amplitude parameter that gives us TargetHurst.
+ fun = @(param_amplitude) abs(TargetHurst - GetStochHurstGivenPars(ThisParamTimescale, param_amplitude, TS_rand, info, TS_P_ann_HighFreq, HistoricData, subarea));
+ initial_val = 50;
+ options = optimset('TolFun',1e-3, 'TolX', 1e-2);
+ ThisParamAmplitude = fminsearch(fun, initial_val, options);
+
+ % store information in function output tables
+ amplitude.(subarea{:})(end+1) = ThisParamAmplitude;
+ Hurst. (subarea{:})(end+1) = TargetHurst;
+
+ end
+ disp(['Done for subarea ' subarea{:}])
+ end
+end
+
+function LowFreq_PreAnalysis_Outputs = CreateFinalOutput(output_table, TS_rand, info)
+
+ SubareaList = info.SubareaList';
+ for subarea = SubareaList % equivalent of VBA "For Each subarea in SubareaList"
+ perturbation = output_table.perturbation';
+ Hurst_NewVal = output_table.Hurst.(subarea{:})';
+ param_timescale = output_table.timescale';
+ param_amplitude = output_table.amplitude.(subarea{:})';
+
+ LowFreq_PreAnalysis_Outputs.(subarea{:}) = table(perturbation, Hurst_NewVal, param_timescale, param_amplitude);
+ end
+ LowFreq_PreAnalysis_Outputs.TS_rand = TS_rand;
+
+end
+
+function extra = CreateExtraOutput(info, DirectionalTestResults, upslope_angle_deg, grad)
+ extra.test_distance = info.pars.test_distance;
+ extra.DirectionalTestResults = DirectionalTestResults;
+ extra.upslope_angle_deg = upslope_angle_deg;
+ extra.grad = grad;
+end
+
+function [HurstChange, NewHurst] = GetHurstChangeForSlope(angle, param_timescale_hist, param_amplitude_hist, HistHurst, TS_rand, info, TS_P_ann_HighFreq, HistoricData, subarea)
+
+ % determine the change in Hurst if we traverse a set distance** in the
+ % direction specified. This code is best read by visualing the 2D
+ % space with the timescale parameter on the x-axis and the amplitude
+ % parameter * 0.01 on the y-axis.
+
+ % ** for the set distance we use one tenth of a unit
+
+ param_timescale_test = param_timescale_hist + info.pars.test_distance*cos(angle / (180/pi()));
+ param_amplitude_test = 100*((param_amplitude_hist/100) + info.pars.test_distance*sin(angle / (180/pi())));
+
+ NewHurst = GetStochHurstGivenPars(param_timescale_test, param_amplitude_test, TS_rand, info, TS_P_ann_HighFreq, HistoricData, subarea);
+
+ HurstChange = NewHurst - HistHurst;
+
+end
+
+function NumCrossings = GetNumZeroCrossings(ts)
+ NumCrossings=0;
+ for k=2:size(ts,1)
+ if ts(k) * ts(k-1)<0
+ NumCrossings=NumCrossings+1;
+ end
+ end
+end
+
+function [StochHurst, TS_BrokenLine] = GetStochHurstGivenPars(param_timescale, param_amplitude, TS_rand, info, TS_P_ann_HighFreq, HistoricData, subarea)
+
+ % generate the stochastic timeseries for the low frequency component,
+ % using the Broken Line process with the parameter values specified
+ TS_BrokenLine = GenTS_BrokenLine(param_timescale, param_amplitude, TS_rand, info.pars);
+
+ % sum the stochastic low and high frequency components together, along with the mean annual precipitation
+ TotalP = TS_BrokenLine' + TS_P_ann_HighFreq.(subarea{:}) + mean(HistoricData.precip_annual.(subarea{:}));
+
+ % calculate the Hurst value of the combined timeseries
+ % StochHurst = GetHurstMean(TotalP, size(HistoricData.precip_annual, 1));
+ StochHurst = GetHurstMean(TotalP, size(HistoricData.precip_annual, 1));
+end
+
+function HurstCoeff = GetHurstMean(data, HistSeriesLength)
+
+ % get an average Hurst Coefficient for an arbitrarily long period, by
+ % splitting it into shorter segments of equal length to the historic
+ % data and calculating the Hurst Coefficient separately for each of the
+ % segments, then taking the average.
+
+
+ len = size(data, 1);
+
+ ind(1) = 1; this_count = 1;
+ for i = 2:len
+ if this_count == HistSeriesLength % divide into segments of length HistSeriesLength
+ ind(i) = ind(i-1)+1; this_count = 1;
+ else
+ ind(i) = ind(i-1); this_count = this_count + 1;
+ end
+ end
+
+ % calculate the number of segments
+ NumSegments = max(ind);
+
+ % calculate the Hurst coefficient for each of the segments separately
+ for iSegment = 1:NumSegments
+ ind2 = ind==iSegment;
+ Hurst_all(iSegment) = GetHurst(data(ind2), 'PlottingOff');
+ end
+
+ HurstCoeff = mean(Hurst_all);
+
+end
+
+function HurstCoeff = GetHurst(data, PlotToggle)
+
+ % created 16/02/2020 by Keirnan Fowler, University of Melbourne
+
+ % calculate the original version of the Hurst Coefficient (ie. the one
+ % hydrologists use, not economists). Only input is a timeseries of
+ % data.
+
+ % divide into partial periods
+ [NumDurations, durations, NumPartials, ind] = GetPartials(data);
+
+ % calculate rescaled range for each partial period
+ RSranges = GetRescaledRanges(data, ind, NumDurations, NumPartials);
+
+ % calculate Hurst exponent by fitting power law to results
+ % and, optionally, produce plots
+ HurstCoeff = GetHurstAndPlot(RSranges, durations, NumDurations, NumPartials, PlotToggle);
+
+end
+
+function [NumDurations, durations, NumPartials, ind] = GetPartials(data)
+
+ n = size(data, 1);
+
+ % Assemble information about segment duration when the full period is
+ % divided into N, N/2, N/4, N/8 etc...
+
+ NextDuration = n; NumDivs = 1; iDuration = 0;
+ while NextDuration>=2
+
+ % information about this duration
+ iDuration = iDuration + 1;
+ durations(iDuration) = NextDuration;
+ NumPartials(iDuration) = floor(n/durations(iDuration));
+
+ % information about the next shortest duration (we will not proceed
+ % unless there are sufficient years)
+ NumDivs = NumDivs*2;
+ NextDuration = floor(n/NumDivs);
+ end
+ NumDurations = iDuration;
+
+
+ % create an array (ind) that indicates the years that belong to each segment
+ ind = zeros(NumDurations, n);
+ for iDuration = 1:NumDurations
+ pos_end= 0;
+ for iPartial = 1:NumPartials(iDuration)
+ pos_start = pos_end + 1;
+ pos_end = pos_start + durations(iDuration) - 1;
+ ind(iDuration, pos_start:pos_end) = iPartial;
+ end
+ end
+end
+
+function RSranges = GetRescaledRanges(data, ind, NumDurations, NumPartials)
+ n = size(data, 1);
+ for iDuration = 1:NumDurations
+ for iPartial = 1:NumPartials(iDuration)
+
+ % get this partial timeseries
+ ThisInd = find(ind(iDuration, :)==iPartial);
+ ThisTS = data(ThisInd);
+
+ % calculate the mean and standard deviation
+ ThisMean = mean(ThisTS); ThisSD = std(ThisTS);
+
+ % create a mean-adjusted series
+ ThisTS = ThisTS - ThisMean;
+
+ % calculate the cumulative deviate series
+ ThisTS = cumsum(ThisTS);
+
+ % compute the range
+ ThisRange = range(ThisTS);
+
+ % compute the rescaled range
+ RSranges(iDuration, iPartial) = ThisRange / ThisSD;
+ end
+ end
+end
+
+function HurstCoeff = GetHurstAndPlot(RSranges, durations, NumDurations, NumPartials, PlotToggle)
+
+ %% Get Hurst exponent
+
+ % get relevant ordinates in log-log space
+ for iDuration = 1:NumDurations
+ x(iDuration) = log(durations(iDuration));
+ y(iDuration) = log(mean(RSranges(iDuration, 1:NumPartials(iDuration))));
+ end
+
+ % line of best fit
+ [m, c] = LinReg_get_m_and_c(x, y);
+
+ % Hurst exponent is slope of log-log line
+ HurstCoeff = m;
+
+ %% optional - do plotting
+ DoPlots = false;
+ if strcmp(PlotToggle, 'PlottingOn'), DoPlots = true; end
+
+ if DoPlots
+
+ % remember the x and y values we just calculated
+ x_log = x; y_log = y;
+
+ % initiate figure
+ figure;
+
+ % subplot 1: rescaled ranges versus partial record length
+ subplot(1, 2, 1); hold on;
+
+ % individual values for each instance of a partial record
+ for iDuration = 1:NumDurations
+ x = repmat(durations(iDuration), NumPartials(iDuration), 1);
+ y = RSranges(iDuration, 1:NumPartials(iDuration));
+ scatter(x, y);
+ end
+
+ % average values for each value of record length (unlogged)
+ x = []; y = [];
+ for iDuration = 1:NumDurations
+ x(iDuration) = durations(iDuration);
+ y(iDuration) = mean(RSranges(iDuration, 1:NumPartials(iDuration)));
+ end
+ scatter(x, y, 30, 'k', 'filled');
+ grid on; xlabel('length of partial record (years)'); ylabel('rescaled range (-)');
+
+ % subplot 2: averages only, in log-log space, with fitted line
+ subplot(1, 2, 2); hold on;
+ scatter(x_log, y_log, 30, 'k', 'filled');
+ grid on; xlabel('log_{n}(length in years)'); ylabel('log_{n}(rescaled range)')
+
+ % line of best fit
+ plot([1.1 5], [m*1.1+c, m*5+c], '-r');
+
+ % labelling
+ text(3, 1, ['Hurst coefficient = ' num2str(HurstCoeff)]);
+ end
+
+end
+
+function [m, c] = LinReg_get_m_and_c(x, y)
+ P = polyfit(x,y,1);
+ m = P(1); c = P(2);
+end
diff --git a/Matalas_apply.m b/Matalas_apply.m
new file mode 100644
index 0000000..5facd55
--- /dev/null
+++ b/Matalas_apply.m
@@ -0,0 +1,160 @@
+function Matalas_RawOutput = Matalas_apply(A, B, TotalYrs, info)
+
+ % Generate a single replicate using the Matalas method, based on
+ % pre-calculated matrices.
+
+ LenRep = TotalYrs;
+
+ NumSite = size(A, 1); ns_check = size(A, 2);
+ if NumSite~=ns_check, error('Error: input matrix is not square.'); end
+
+ X1(1:NumSite) = 0.0;
+ X2(1:NumSite) = 0.0;
+ for i = 1:LenRep
+ rn = rand(1, 8);
+ for j = 1:NumSite
+ xrn = rn(j);
+ % Zstd(j,i) = ppnd(xrn); % tried and failed to convert function ppnd from Fortran to Matlab (see below)
+ Zstd(j,i) = norminv(xrn);
+ sij = 0.0;
+ for k = 1:NumSite
+ sij = sij + A(j,k) * X1(k);
+ end
+ for k = 1:j
+ sij = sij + B(j,k)*Zstd(k,i);
+ end
+ X2(j) = sij;
+ end
+
+ tf = isreal(X2);
+ if ~tf
+ test01 = 1;
+ end
+
+ for j = 1:NumSite
+ X1(j) = X2(j);
+ V(j,i) = X2(j);
+ % if (flag.eq.'N') then
+ % V(j,i) = X2(j);
+ % elseif (flag.eq.'S') then
+ % V(j,i) = sdt(j) * X2(j)
+ % elseif (flag.eq.'L') then
+ % V(j,i) = av(j) + sdt(j) * X2(j)
+ % endif
+ end
+ end
+
+ tf = isreal(V);
+ if ~tf
+ test01 = 1;
+ end
+
+ % table format for output
+ Matalas_RawOutput = array2table(V', 'VariableNames', info.SubareaList);
+
+end
+
+% function NormalDeviate = ppnd(P)
+%
+% % Created 04/10/2019 by Keirnan Fowler, University of Melbourne
+% % I'm sure there's an equivalent procedure in Matlab but I want this
+% % to be identical to Fortran code provided by Rory Nathan.
+%
+% % ORIGINAL CODE
+%
+% % REAL FUNCTION PPND(P,IFAULT)
+% % !
+% % ! PRODUCES NORMAL DEVIATE CORRESPONDING TO LOWER TAIL AREA OF P.
+% % ! ALGORITHM AS 111 APPLIED STATISTICS (1977) VOL. 26, P 118
+% % !
+% % REAL ZERO, SPLIT, HALF, ONE, A0, A1, A2, A3, B1, B2, B3, B4
+% % REAL C0, C1, C2, C3, D1, D2, P, Q, R, ZABS, ZLOG, ZSQRT
+% % !
+% % DATA ZERO, HALF, ONE, SPLIT /0.0E0, 0.5E0, 1.0E0, 0.42E0/
+% % DATA A0 / 2.50662 82388 4E0/, &
+% % A1 / -18.61500 06252 9E0/, &
+% % A2 / 41.39119 77353 4E0/, &
+% % A3 / -25.44106 04963 7E0/, &
+% % B1 / -8.47351 09309 0E0/, &
+% % B2 / 23.08336 74374 3E0/, &
+% % B3 / -21.06224 10182 6E0/, &
+% % B4 / 3.13082 90983 3E0/
+% % !
+% % ! HASH SUM AB 143.70383 55807 6
+% % !
+% % DATA C0 / -2.78718 93113 8E0/, &
+% % C1 / -2.29796 47913 4E0/, &
+% % C2 / 4.85014 12713 5E0/, &
+% % C3 / 2.32121 27685 8E0/, &
+% % D1 / 3.54388 92476 2E0/, &
+% % D2 / 1.63706 78189 7E0/
+% % !
+% % ! HASH SUM CD 17.43746 52092 4
+% % !
+% % ZABS(P) = ABS(P)
+% % ZLOG(P) = ALOG(P)
+% % ZSQRT(P) = SQRT(P)
+% % !
+% % IFAULT = 0
+% % Q = P - HALF
+% % IF (ZABS(Q) .GT. SPLIT) GOTO 1
+% % R = Q * Q
+% % PPND = Q * (((A3 * R + A2) * R + A1) * R + A0) / &
+% % ((((B4 * R + B3) * R + B2) * R + B1) * R + ONE)
+% % RETURN
+% % 1 R = P
+% % IF (Q .GT. ZERO) R = ONE - P
+% % IF (R .LT. ZERO) GOTO 2
+% % R = ZSQRT(-ZLOG(R))
+% % PPND = (((C3 * R + C2) * R + C1) * R + C0) / ((D2 * R + D1) * R + ONE)
+% % IF (Q .LT. ZERO) PPND = -PPND
+% % RETURN
+% % 2 IFAULT = 1
+% % PPND = ZERO
+% % RETURN
+% % END
+%
+% split = 0.42;
+% A0 = [ 2.50662 82388 4E0];
+% A1 = [-18.61500 06252 9E0];
+% A2 = [ 41.39119 77353 4E0];
+% A3 = [-25.44106 04963 7E0];
+% B1 = [ -8.47351 09309 0E0];
+% B2 = [ 23.08336 74374 3E0];
+% B3 = [-21.06224 10182 6E0];
+% B4 = [ 3.13082 90983 3E0];
+%
+% % HASH SUM AB 143.70383 55807 6
+%
+% C0 = [-2.78718 93113 8E0];
+% C1 = [-2.29796 47913 4E0];
+% C2 = [ 4.85014 12713 5E0];
+% C3 = [ 2.32121 27685 8E0];
+% D1 = [ 3.54388 92476 2E0];
+% D2 = [ 1.63706 78189 7E0];
+%
+% % HASH SUM CD 17.43746 52092 4
+%
+% % ZABS(P) = abs(P);
+% % ZLOG(P) = log(P);
+% % ZSQRT(P) = sqrt(P);
+%
+% Q = P - 0.5;
+% if abs(Q) > split
+% R = P;
+% if Q > 0.0, R = 1 - P; end
+% if R < 0.0
+% error('IFAULT = 1');
+% else
+% R = sqrt(-log(R));
+% PPND = (((C3 * R + C2) * R + C1) * R + C0) / ((D2 * R + D1) * R + 1.0);
+% if Q < 0, PPND = -PPND; end
+% end
+% else
+% R = Q * Q;
+% PPND = Q * (((A3 * R + A2) * R + A1) * R + A0) / ((((B4 * R + B3) * R + B2) * R + B1) * R + 1.0);
+% end
+%
+% NormalDeviate = PPND;
+%
+% end
diff --git a/Matalas_define.m b/Matalas_define.m
new file mode 100644
index 0000000..ad58042
--- /dev/null
+++ b/Matalas_define.m
@@ -0,0 +1,187 @@
+function [M0, M1, A, B, C] = Matalas_define(Matalas_RawData)
+
+ % first, convert table to array
+ RawArray = table2array(Matalas_RawData);
+
+ % lag-0 (M0) population covariance matrix
+ lag = 0; M0 = XCovMat (RawArray', lag);
+
+ % lag-1 (M1) population covariance matrix
+ lag = 1; M1 = XCovMat (RawArray', lag);
+
+ % compute matrix A
+ M0I = M0;
+ M0I_inv = MatInv_GaussJordan(M0I);
+ A = mtimes(M1, M0I_inv); % could also write A = M1*M0I
+
+ % compute matrix C
+ D1 = mtimes(M1, M0I_inv); % not clear on how this is different to A
+ M1T = MatTrans(M1);
+ D2 = mtimes(D1,M1T);
+ C = M0 - D2;
+
+ % compute matrix B
+ B = GetMatrixB(C);
+
+end
+
+function OutMatrix = XCovMat (InMatrix, lag)
+
+ % computes covariance matrix between X(t) and X(lag t)
+ % I suspect Matlab can do this more natively than the
+ % below brute force code... but I want to make it identical
+ % where possible to Rory's Fortran.
+
+ ns = size(InMatrix, 1); % number of sites
+ num = size(InMatrix, 2); % number of data points (years)
+
+ for i = 1:ns
+ for j = 1:ns
+ sij = 0.0;
+ ks = 1 + lag;
+ for k = ks:num
+ sij = sij + InMatrix(i,k) * InMatrix(j,k-lag);
+ end
+ a(i,j) = sij / num;
+ end
+ end
+
+ OutMatrix = a;
+
+end
+
+function OutMatrix = MatInv_GaussJordan(InMatrix)
+
+ % Computes inverse and determinant of matrix A by the Gauss-Jordan method
+
+ a = InMatrix;
+ ns = size(a, 1); ns_check = size(a, 2);
+ if ns~=ns_check, error('Error: input matrix is not square.'); end
+ ipiv(1:ns) = 0;
+
+ for i = 1:ns
+ big = 0.0;
+ for j = 1:ns
+ if (ipiv(j)~=1)
+ for k = 1:ns
+ if ipiv(k)==0
+ if abs(a(j,k))>=big
+ big = abs(a(j,k));
+ irow = j;
+ icol = k;
+ end
+ elseif ipiv(k)>1
+ error('Matrix M0 is singular - inverse cannot be found');
+ end
+ end
+ end
+ end
+ ipiv(icol) = ipiv(icol) + 1;
+ if (irow~=icol)
+ for l = 1:ns
+ dum = a(irow,l);
+ a(irow,l) = a(icol,l);
+ a(icol,l) = dum;
+ end
+ end
+ indxr(i) = irow;
+ indxc(i) = icol;
+ if a(icol,icol)==0.0
+ error('Matrix M0 is singular - inverse cannot be found');
+ end
+ pivinv = 1.0 / a(icol,icol);
+ a(icol,icol) = 1.0;
+ for l = 1:ns
+ a(icol,l) = a(icol,l) * pivinv;
+ end
+ for ll = 1:ns
+ if ll~=icol
+ dum = a(ll,icol);
+ a(ll,icol) = 0.0;
+ for l = 1:ns
+ a(ll,l) = a(ll,l) - a(icol,l) * dum;
+ end
+ end
+ end
+ end
+
+ for l = ns:-1:1
+ if indxr(l)~=indxc(l)
+ for k = 1:ns
+ dum = a(k,indxr(l));
+ a(k,indxr(l)) = a(k,indxc(l));
+ a(k,indxc(l)) = dum;
+ end
+ end
+ end
+
+ OutMatrix = a;
+
+end
+
+function OutMatrix = MatTrans(InMatrix)
+
+ % computes transpose matrix (B) of input matrix (A).
+ a = InMatrix;
+
+ ns = size(a, 1); ns_check = size(a, 2);
+ if ns~=ns_check, error('Error: input matrix is not square.'); end
+
+ for i = 1:ns
+ for j = 1:ns
+ b(j,i) = a(i,j);
+ end
+ end
+
+ OutMatrix = b;
+end
+
+function OutMatrix = GetMatrixB(InMatrix)
+
+ % compute matrix B based on input of matrix C
+ % (kf: I legit don't have more info than this)
+
+ c = InMatrix;
+ ns = size(c, 1); ns_check = size(c, 2);
+ if ns~=ns_check, error('Error: input matrix is not square.'); end
+
+ i = 1;
+ j = 1;
+ if c(1,1)<=0.0
+ error('Unable to determine matrix B');
+ end
+ b(1,1) = sqrt(c(1,1));
+ for i = 2:ns
+ b(i,1) = c(i,1) / b(1,1);
+ if i~=2
+ for j = 2:i-1
+ sij = 0.0;
+ for k = 1:j-1
+ sij = sij + (b(i,k) * b(j,k));
+ end
+ if b(j,j)<=0.0
+ b(i,j) = 0.0;
+ else
+ b(i,j) = (c(i,j) - sij) / b(j,j);
+ end
+ end
+ end
+ sij = 0.0;
+ for j = 1:i-1
+ sij = sij + (b(i,j) * b(i,j));
+ end
+ if (c(i,i)-sij)<=0.0
+ b(i,i) = 0.0;
+ else
+ b(i,i) = sqrt(c(i,i) - sij);
+ end
+ end
+ % set zero elements of b
+ for i = 1:ns-1
+ for j = i+1:ns
+ b(i,j) = 0.0;
+ end
+ end
+
+ OutMatrix = b;
+end
\ No newline at end of file
diff --git a/PerturbP_deltaLowFreqP.m b/PerturbP_deltaLowFreqP.m
new file mode 100644
index 0000000..0248aa0
--- /dev/null
+++ b/PerturbP_deltaLowFreqP.m
@@ -0,0 +1,96 @@
+
+function [TS_P_ann_interim, extra_b] = PerturbP_deltaLowFreqP(TS_P_ann_HighFreq, HistoricData, deltaLowFreqP, info)
+
+ % Created January 2021 by Keirnan Fowler, University of Melbourne
+
+ % This code creates the synthetic annual rainfall timeseries such that
+ % the Hurst coefficient is greater or less than historic, in line with
+ % perturbation indicated by deltaLowFreqP. A higher Hurst coefficient
+ % means greater persistence in the timeseries which means consecutive
+ % values are more dependant on one another and thus multi-year droughts
+ % will tend to be longer, as will multi-year runs of wet conditions.
+
+ % Possible outcomes:
+ % deltaLowFreqP = 0 means no change to historic Hurst values;
+ % deltaLowFreqP > 0 means Hurst value goes up by the specified amount;
+ % deltaLowFreqP < 0 means Hurst value goes down by the specified amount.
+
+ % The 'levers' that this code can 'pull' to acheive the desired Hurst
+ % coefficient are the parameters of the model used to generate the low
+ % frequency component of the rainfall. The high frequency component is
+ % pre-generated and is passed to this function in TS_P_ann_HighFreq.
+ % All Hurst values (whether the context is synethetic data or observed)
+ % are calculated on total precipitation rather than the high frequency
+ % or low frequency components separately.
+
+ % The model used to generate the low frequency component is the Broken
+ % Line process. The two parameters are:
+ % - parameter 1, timescale parameter.
+ % - parameter 2, amplitude parameter (standard deviation).
+ % Both parameters must be tuned to give the required response to a given
+ % perturbation. This code assumes a prior routine called
+ % LowFreq_PreAnalysis.m has pre-generated lookup tables of parameters to
+ % adopt for a given perturbation. For the present function, it remains
+ % only to apply these parameters to generate the stochastic data.
+
+ % for each subarea
+ SubareaList = info.SubareaList'; TS_P_ann_LowFreq = []; TS_P_ann_interim = [];
+ for subarea = SubareaList % equivalent of VBA "For Each subarea in SubareaList"
+
+ % get the parameter values to use (with reference to pre-populated tables)
+ [param_amplitude, param_timescale] = GetParams(subarea, deltaLowFreqP, info);
+
+ % apply the parameter values to generate the low frequency component
+ TS_P_ann_LowFreq(:, end+1) = GenTS_BrokenLine(param_timescale, param_amplitude, info.LowFreq_PreAnalysis_Outputs.TS_rand, info.pars);
+
+ % add high and low freqency components together with the long term mean, to get the total rainfall
+ TS_P_ann_interim(:, end+1) = P_finalise(TS_P_ann_LowFreq(:, end), TS_P_ann_HighFreq.(subarea{:}), HistoricData.precip_annual.(subarea{:}));
+
+ % remember the value of amplitude and frequency used
+ ParamArchive.amplitude.(subarea{:}) = param_amplitude;
+ ParamArchive.timescale.(subarea{:}) = param_timescale;
+
+ end
+
+ % very rarely, we may get a negative number - check for this
+ TS_P_ann_interim(:, :) = max(0.1, TS_P_ann_interim(:, :)); % instead of the negative number, assign the year 0.1 mm of rainfall
+
+ % format outputs as tables
+ TS_P_ann_LowFreq = array2table(TS_P_ann_LowFreq, 'VariableNames', SubareaList);
+ TS_P_ann_interim = array2table(TS_P_ann_interim, 'VariableNames', SubareaList);
+
+ extra_b.ParamArchive = ParamArchive;
+ extra_b.TS_P_ann_LowFreq = TS_P_ann_LowFreq;
+
+end
+
+function [param_amplitude, param_timescale] = GetParams(subarea, deltaLowFreqP, info)
+
+ LookupTable = info.LowFreq_PreAnalysis_Outputs.(subarea{:});
+
+ P = polyfit(LookupTable.perturbation, LookupTable.param_amplitude, 1); param_amplitude = P(1)*deltaLowFreqP + P(2);
+ P = polyfit(LookupTable.perturbation, LookupTable.param_timescale, 1); param_timescale = P(1)*deltaLowFreqP + P(2);
+
+ % note, the reason that we use linear regression not interpolation (or
+ % simply reading off the value!) is that random fluctuations when generating
+ % the stochastic replicates of the Broken Line process cause irregularities
+ % in the relationship, even to the point where it is non-monotonic. To
+ % ensure monotonicity, we take the line of best fit.
+
+end
+
+function CombinedTimeseries = P_finalise(TS_P_ann_LowFreq, TS_P_ann_HighFreq, HistoricData)
+
+ LongTermMean = mean(HistoricData);
+ CombinedTimeseries = TS_P_ann_LowFreq ... % stochastic low frequency component from Broken Line process
+ + TS_P_ann_HighFreq ... % stochastic high frequency component from Matalas
+ + LongTermMean; % historic long term mean
+
+ % If you are familiar with EMD you will know that EMD also provides a
+ % residual which captures the trend in the data (if any). Often in EMD
+ % studies, such trends may be added back to the stochastic data.
+ % However, here it is appropriate that it be removed and not inform the
+ % final stochastic process, because no stress test stochastic timeseries
+ % ever has a trend in it - they are all stationary by design. Thus, we
+ % are interested only in oscillations, not trends.
+end
diff --git a/PerturbPmean.m b/PerturbPmean.m
new file mode 100644
index 0000000..1167909
--- /dev/null
+++ b/PerturbPmean.m
@@ -0,0 +1,13 @@
+function [TS_P_ann, extra_d] = PerturbPmean(TS_P_ann_interim, deltaP, info)
+
+ % created 08/10/2019 by Keirnan Fowler, University of Melbourne
+
+ % apply scaling factor
+ TS_P_ann = table2array(TS_P_ann_interim)*(1+deltaP);
+
+ % convert TS_P_ann to a table
+ TS_P_ann = array2table(TS_P_ann, 'VariableNames', info.SubareaList);
+
+ extra_d = [];
+
+end
\ No newline at end of file
diff --git a/PerturbT.m b/PerturbT.m
new file mode 100644
index 0000000..af09702
--- /dev/null
+++ b/PerturbT.m
@@ -0,0 +1,13 @@
+function [TS_T_ann, extra_e] = PerturbT(TS_T_ann_interim, deltaT, info)
+
+ % created 08/10/2019 by Keirnan Fowler, University of Melbourne
+
+ % add the temperature
+ TS_T_ann = table2array(TS_T_ann_interim)+deltaT;
+
+ % convert TS_T_ann to a table
+ TS_T_ann = array2table(TS_T_ann, 'VariableNames', info.SubareaList);
+
+ extra_e = [];
+
+end
\ No newline at end of file
diff --git a/RunWapaba.m b/RunWapaba.m
new file mode 100644
index 0000000..b80fd22
--- /dev/null
+++ b/RunWapaba.m
@@ -0,0 +1,128 @@
+function [SimFlows, SimVars] = RunWapaba(ParVals, rain, PET, month)
+
+ % Created 10/05/2018 by Connor McCutcheon and Keirnan Fowler
+
+ % Purpose: run Wapaba model with specified timeseries inputs and model parameters
+
+ % Model Parameters
+ a1 = ParVals.a1;
+ a2 = ParVals.a2;
+ B = ParVals.B;
+ Smax = ParVals.Smax;
+ K = ParVals.K;
+
+ % initialise size of arrays
+ NumTimesteps = size(rain, 1);
+ SimFlows (1:NumTimesteps) = -99.99;
+ T (1:NumTimesteps) = -99.99;
+ SimVars.Flows.Qb (1:NumTimesteps) = -99.99;
+ SimVars.Flows.Qs (1:NumTimesteps) = -99.99;
+ SimVars.States.S (1:NumTimesteps) = -99.99;
+ SimVars.States.G (1:NumTimesteps) = -99.99;
+ SimVars.Flux.ET (1:NumTimesteps) = -99.99;
+ SimVars.Flux.R (1:NumTimesteps) = -99.99;
+
+ % Step 1: initialise model states
+ S = 0.01*Smax;
+ G = 0;
+
+ % Step 2: loop to run model for each timestep
+ for iTS = 1:NumTimesteps
+
+ P = rain(iTS);
+
+ % Define T value for each month
+ if month (iTS) == 1 || 3 || 5 || 7 || 8 || 10 || 12
+ T (iTS) = 31;
+ elseif month (iTS) == 2
+ T (iTS) = 28;
+ else
+ T (iTS) = 30;
+ end
+
+ %Catchment water consumption potential
+ X0 = PET(iTS) + (Smax - S);
+
+ %Consumption curve #1: Supply = rainfall, Demand = Catchment water
+ %consumption potential
+ F1 = 1+P/X0-(1+(P/X0).^a1).^(1/a1);
+
+ %Catchment water consumption
+ X = X0 * F1;
+
+ %Catchment water yield
+ Y = max(P - X,0);
+
+ %Total water available for evapotranspiration
+ W = S + X;
+
+ %Consumption curve #2: Supply = water available for ET, Demand = PET
+ F2 = 1+W/PET(iTS)-(1+(W/PET(iTS)).^a2).^(1/a2);
+
+ %Actual evapotranspiration
+ ET = F2 * PET(iTS);
+
+ %Recharge to groundwater store
+ R = max(B * Y,0);
+
+ %Surface Runoff
+ Qs = max(Y - R,0);
+
+ %Baseflow calculation
+ Z = 1-exp(-T(iTS)/K);
+ Qb1 = min(G*Z+R*(1-(K/T(iTS))*Z),G);
+ Qb= max(Qb1,0);
+
+ %Groundwater store
+ G = max(G+R-Qb,0);
+
+ %Soil moisture storage
+ S1 = max(W-ET,0);
+ S= min(S1,Smax);
+
+ %Accounting for soil moisture storage exceedance
+ if S1>Smax
+ Qs=Qs+(S1-Smax);
+ end
+
+ %Total Runoff
+ Q = Qs + Qb;
+
+ % Assign SimFlows and SimVars
+ SimFlows(iTS) = Q;
+ SimVars.States.S(iTS) = S;
+
+ % some optional tasks
+ RememberAdditionalVariables = false; % option to turn on if user requires
+ if RememberAdditionalVariables
+ SimVars.Flows.Qb (iTS) = Qb;
+ SimVars.Flows.Qs (iTS) = Qs;
+ SimVars.States.G(iTS) = G;
+ SimVars.Flux.ET(iTS) = ET;
+ SimVars.Flux.R(iTS) = R;
+ end
+
+ % optional water balance checking
+ WaterBalanceChecking = false; % option to turn on if user requires
+ if WaterBalanceChecking
+ A=iTS-1;
+ if A==0
+ SystemBal=0;
+ else
+ % SystemBal = round(P + SimVars.States.S(iTS-1) - SimVars.States.S(iTS) + SimVars.States.G(iTS-1) - SimVars.States.G(iTS) - SimVars.Flux.ET(iTS) - SimFlows(iTS),5);
+ SystemBal = round(P + LastTS.S - S + LastTS.G - G - ET - Q,5);
+ end
+ SimVars.Balance(iTS) = SystemBal;
+ end
+
+ if isreal(S) && isreal(G) && isreal(Q)
+ else
+ if ~exist('ComplexNumberDetected')
+ ParVals
+ error(['Complex values in simulated Q for timestep ' num2str(iTS) ' for above parameter set.']);
+ ComplexNumberDetected = true;
+ end
+ end
+ end
+
+end
\ No newline at end of file
diff --git a/SaveClimateAndFlow.m b/SaveClimateAndFlow.m
new file mode 100644
index 0000000..9b71383
--- /dev/null
+++ b/SaveClimateAndFlow.m
@@ -0,0 +1,32 @@
+
+function SaveClimateAndFlow(i, DataOut, deltaP, deltaT, deltaLowFreqP, deltaSeasonality, deltaRRrship_space, info)
+
+ if ~info.isOctave
+ % get the data relevant to this deltaRRrship
+ deltaRRrship = deltaRRrship_space(i);
+ ThisData = DataOut.(['deltaRRrship' num2str(i)]);
+
+ % separate out the items to be saved from the input structure
+ TS_Q_forModel = ThisData.TS_Q_ReachInflows;
+ TS_P = ThisData.TS_P;
+ TS_T = ThisData.TS_T;
+ TS_PET = ThisData.TS_PET;
+
+ % remember the perturbations
+ perturbations = struct('deltaP', deltaP, 'deltaT', deltaT, 'deltaLowFreqP', deltaLowFreqP, 'deltaSeasonality', deltaSeasonality, 'deltaRRrship', deltaRRrship);
+
+ % create name of file
+ OutFile_path = ['out\out_data\' 'StochasticClimate_' ...
+ num2str(deltaP, '%+3.2f') '_' ...
+ num2str(deltaT, '%+3.2f') '_' ...
+ num2str(deltaLowFreqP, '%+3.3f') '_' ...
+ num2str(deltaSeasonality, '%+3.2f') '_' ...
+ num2str(deltaRRrship, '%+3.2f') '.mat'];
+
+ % now save
+ save(OutFile_path, 'TS_Q_forModel', 'TS_P', 'TS_T', 'TS_PET', 'perturbations');
+ else
+ warning('Note: Octave is not currently able to save tables to .mat files. Users of Octave should code their own procedure to archive the output.');
+ end
+end
+
diff --git a/peelOffNameValueOptions.m b/peelOffNameValueOptions.m
new file mode 100644
index 0000000..4116a75
--- /dev/null
+++ b/peelOffNameValueOptions.m
@@ -0,0 +1,28 @@
+## Copyright (C) 2019 Andrew Janke
+##
+## This program is free software; you can redistribute it and/or modify
+## it under the terms of the GNU General Public License as published by
+## the Free Software Foundation; either version 2 of the License, or
+## (at your option) any later version.
+##
+## This program is distributed in the hope that it will be useful,
+## but WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+## GNU General Public License for more details.
+##
+## You should have received a copy of the GNU General Public License
+## along with this program; If not, see .
+
+function [opts, remainingArgs, peeledArgs] = peelOffNameValueOptions (args, knownOpts)
+ %PEELOFFNAMEVALUEOPTIONS Peel off name-value options from argins
+ %
+ % Peels off recognized name-value options from the end of an argument array.
+ opts = struct;
+ peeledArgs = {};
+ while numel (args) >= 2 && ischar (args{end-1}) && ismember (args{end-1}, knownOpts)
+ peeledArgs = [peeledArgs args(end-1:end)];
+ opts.(args{end-1}) = args{end};
+ args(end-1:end) = [];
+ end
+ remainingArgs = args;
+endfunction
diff --git a/readtable.m b/readtable.m
new file mode 100644
index 0000000..5992858
--- /dev/null
+++ b/readtable.m
@@ -0,0 +1,30 @@
+function outtable = readtable(filepath)
+
+ % created 13/02/2021 by Keirnan Fowler, University of Melbourne
+
+ % This is a *very basic* Octave readtable function to replicate readtable in Matlab.
+ % It is only intended to work with the data input files of the stochastic generation
+ % framework example and may not be extendable to other contexts.
+
+ % suppress various pesky Octave warning messages (ensure you check
+ % correct import though!)
+ warning('off','all');
+
+ % read the numerical data and column headers from the csv
+ ThisData = importdata(filepath, ',', 1);
+
+ % create separate vectors for each column
+ NumCols = size(ThisData.colheaders, 2); namestring = [];
+ for iCol = 1:NumCols
+ ColName = ThisData.colheaders{iCol};
+ eval([ColName ' = ThisData.data(:, iCol);']);
+ namestring = [namestring ColName ', '];
+ end
+
+ % create table from variables
+ eval(['outtable = table(' namestring(1:(end-2)) ');']);
+
+ % turn warnings back on
+ warning('default','all');
+
+end
\ No newline at end of file
diff --git a/split_high_low_using_CEEMDAN.m b/split_high_low_using_CEEMDAN.m
new file mode 100644
index 0000000..3314045
--- /dev/null
+++ b/split_high_low_using_CEEMDAN.m
@@ -0,0 +1,1248 @@
+function HistoricData = split_high_low_using_CEEMDAN(HistoricData, info)
+
+ % warning('off', 'Octave:missing-semicolon'); % should be off anyway by default
+
+ % high level settings specifying CEEMDAN scope and whether to plot results
+ PlotFigures = false; % set to true if you would like to see additional figures. Note, this does not include
+ % the figures in the paper, which are created later.
+ AnalyseRandomNoise = false; % enter 'true' to conduct an additional analysis that examines whether the observed IMF strengths
+ % are stronger than those obtained from random noise. This takes about 100 times longer (3-6 hours)
+ % but gives insight into whether the observed results could have arisen by chance.
+
+ % run the CEEMDAN algorithm
+ HistoricData.precip_annual_IMFs = RunCEEMDAN(HistoricData, AnalyseRandomNoise, PlotFigures, info);
+
+ % do a double aggregation:
+ % (i) for each subarea separately, aggregate across IMFs to create a single low
+ % frequency timeseries and a single high frequency timeseries for each subarea;
+ % (ii) for the low frequency only, aggregate across subareas to create a single timeseries.
+ [precip_ann_HighFreq, precip_ann_LowFreq] = AggregateIMFs(HistoricData, info.LowHighThresh, info);
+
+ % append CEEMDAN output to the data structure
+ HistoricData.precip_ann_HighFreq = precip_ann_HighFreq;
+ HistoricData.precip_ann_LowFreq = precip_ann_LowFreq;
+
+end
+
+function [IMFs_all, extra] = RunCEEMDAN(HistoricData, AnalyseRandomNoise, PlotFigures, info)
+
+ % this version created January 2021 by Keirnan Fowler, based on earlier
+ % code by Natasha Ballis, both of University of Melbourne.
+
+ % inputs 'AnalyseRandomNoise' and 'PlotFigures' are true/false and are
+ % used to indicate whether the user wants to run these parts of the code.
+
+ % for each subarea
+ SubareaList = info.SubareaList';
+ for subarea = SubareaList % equivalent of VBA "For Each subarea in SubareaList"
+
+ clearvars -except HistoricData AnalyseRandomNoise PlotFigures info subarea SubareaList IMFs_all extra
+ close all; % Clear all variables and close all figures associated with the previous iteration
+
+ % create blank structure to hold observed data for this subarea
+ R=struct('hist_data',[],'imfs',[],'imf_period',[],'imf_var',[]);
+
+ % populate observed data
+ R.hist_data = HistoricData.precip_annual.(subarea{:});
+
+ % observed stats
+ Obs_length=size(R.hist_data,1);
+ Obs_std=sqrt(var(R.hist_data,0)); % zero: sample variance
+
+ % algorithm settings for CEEMDAN v2014
+ Nstd = 0.4;
+ NR = 500;
+ MaxIter = 1000000;
+ SNRFlag = 1;
+
+ % run CEEMDAN v2014 for this subarea and process results
+ disp_toggle = false; % change to true if you want to see CEEMDAN warning messages
+ [imfs,its]=ceemdan_v2014(R.hist_data',Nstd,NR,MaxIter,SNRFlag, disp_toggle);
+ [imf_var, imf_period] = GetStats(imfs); % compute variance and average period of each IMF
+ R.imfs = imfs; R.imf_var = imf_var; R.imf_period = imf_period; R.its = its;
+
+ % save results to output structures
+ IMFs_all.(subarea{:}) = imfs;
+ extra.(subarea{:}) = R;
+
+ disp(['CEEMDAN complete for historic data for subarea ' subarea{:}]);
+
+ % now repeat for noise replicates (if required) (note, this takes a long time, usually > 5 hours)
+ if AnalyseRandomNoise
+
+ NumReps = 100;
+
+ % noise replicates
+ N=struct('Replicate',[],'imfs',[],'imf_period',[],'imf_var',[]);
+
+ % generate noise replicates
+ for iRep=1:NumReps
+ N(iRep).Replicate=Obs_std*2*sqrt(3)*rand(Obs_length,1); % uniform white noise
+ disp_toggle = false; % change to true if you want to see CEEMDAN warning messages
+ [imfs,its]=ceemdan_v2014(N(iRep).Replicate,Nstd,NR,MaxIter,SNRFlag, disp_toggle);
+ [imf_var, imf_period] = GetStats(imfs); % compute variance and average period of each IMF
+ N(iRep).imfs = imfs; N(iRep).imf_var = imf_var; N(iRep).imf_period = imf_period; N(iRep).its = its;
+ disp(['CEEMDAN complete for noise replicate ' num2str(iRep) ' of ' num2str(NumReps)]);
+ end
+
+ % once all reps are complete, save.
+ save(['CEEMDAN_output_incl_noise_reps_Subarea' subarea{:} '.mat']);
+
+ end
+
+ % Figure 1: Timeseries plotting
+ if PlotFigures
+ CreateFig1(R);
+ FigFileName = ['Fig1_Timeseries_Subarea' subarea{:}];
+ set(gcf, 'PaperUnits', 'centimeters'); set(gcf, 'PaperPosition', [0 0 20 23]);
+ saveas(gcf,FigFileName, 'png'); print('-painters','-depsc', FigFileName); close;
+
+ % Figure 2: Comparison with random noise replicates
+ if AnalyseRandomNoise
+ CreateFig2(R, N);
+ FigFileName = ['Fig2_IMF_freq_var_Subarea' subarea{:}];
+ set(gcf, 'PaperUnits', 'centimeters'); set(gcf, 'PaperPosition', [0 0 10 12]);
+ saveas(gcf,FigFileName, 'png'); print('-painters','-depsc', FigFileName); close;
+ end
+ end
+
+ end
+
+end
+
+function [precip_ann_HighFreq, precip_ann_LowFreq] = AggregateIMFs(HistoricData, LowHighThresh, info)
+
+ % do a double aggregation:
+ % (i) for each subarea separately, aggregate across IMFs to create a single low
+ % frequency timeseries and a single high frequency timeseries for each subarea;
+ % (ii) for the low frequency only, aggregate across subareas to create a single timeseries.
+
+ % initialise arrays that will hold output data
+ precip_ann_HighFreq_array = HistoricData.precip_annual.Year;
+ precip_ann_LowFreq_array = HistoricData.precip_annual.Year;
+
+ % (i) for each subarea separately, aggregate across IMFs to create a single low
+ % frequency timeseries and a single high frequency timeseries for each subarea;
+ NumSubareas = size(info.SubareaList, 1);
+ for iSubarea = 1:NumSubareas
+
+ % isolate IMFs for this subarea
+ ThisSubarea = info.SubareaList{iSubarea};
+ this_IMF_set = HistoricData.precip_annual_IMFs.(ThisSubarea);
+ this_IMF_set = this_IMF_set(1:(end-1), :); % remove last column (residual) - see note** below
+
+ % do the aggregatation across IMFs
+ high = sum(this_IMF_set(1:LowHighThresh, :));
+ low = sum(this_IMF_set((LowHighThresh+1):end, :));
+
+ % add to output table
+ precip_ann_HighFreq_array = [precip_ann_HighFreq_array high'];
+ precip_ann_LowFreq_array = [precip_ann_LowFreq_array low'];
+
+ end
+
+ % ** note: the last IMF is not actually an IMF, it's the residual from
+ % the EMD process. The EMD process progressively removes oscillations,
+ % highest frequency first, but to do so it requires that at least one
+ % full cycle be contained in the historic record. If the lowest
+ % frequency osscilation contains less than one full cycle, it is not a
+ % true IMF and gets lumped in with the residual which may also contain
+ % a long term trend. If a trend is present (eg. due to climate change
+ % in the historic record), it is appropriate that it be removed here
+ % and not inform the final stochastic process, because no stress test
+ % stochastic timeseries ever has a trend in it - they are all
+ % stationary by design. Thus, we are interested only in oscillations,
+ % not trends.
+
+ % (ii) for the low frequency only, aggregate across subareas to create a single timeseries.
+ precip_ann_LowFreq_array(:, end+1) = nan; % create new column at end
+ NumYears = size(precip_ann_HighFreq_array, 1);
+ for iYear = 1:NumYears
+ values = precip_ann_LowFreq_array(iYear, 2:(1+NumSubareas));
+ weights = info.SubAreaDetails.weighting_lowfreq';
+ adopted = sum(values.*weights) / sum(weights);
+ precip_ann_LowFreq_array(iYear, end) = adopted;
+ end
+
+ % convert to tables
+ precip_ann_HighFreq = array2table(precip_ann_HighFreq_array, 'VariableNames', ['Year'; info.SubareaList]);
+ precip_ann_LowFreq = array2table(precip_ann_LowFreq_array, 'VariableNames', ['Year'; info.SubareaList; 'adopted_combined']);
+
+
+end
+
+function [imf_var, imf_period] = GetStats(imfs)
+
+ Numimfs = size(imfs,1);
+
+ % compute variance for each mode
+ for iMode = 1:Numimfs, imf_var(iMode) = var(imfs(iMode,:),0); end % zero: sample variance
+
+ % compute average period of each mode by examining zero crossings
+ for iMode = 1:Numimfs
+ crossings=0; first=0; last=0;
+ for k=2:size(imfs,2) % k = position in column
+ if imfs(iMode,k) * imfs(iMode,k-1)<0
+ crossings=crossings+1;
+ last=k;
+ if isequal(first,0)
+ first=k;
+ end
+ end
+ end
+
+ if last>first
+ imf_period(iMode) = (last-first)/((crossings-1)/2); % assume one data point per one unit of length
+ else
+ imf_period(iMode) = NaN;
+ end
+ end
+end
+
+function CreateFig1(R)
+
+ % Plot historic precip
+ figure(1);
+ NumIMFs=size(R.imfs,1);
+ subplot(NumIMFs+1,1,1)
+ plot(R.hist_data,'r')
+ ylabel('hist data (mm/yr)','FontSize',7)
+ set(gca,'XTickLabel',[], 'FontSize', 8)
+ title('Historic annual precipitation (red) split into intrinsic mode functions (blue)')
+
+ % Now plot IMFs
+ for iMode=1:NumIMFs
+ subplot(NumIMFs+1,1,iMode+1)
+ plot(R.imfs(iMode,:))
+ set(gca,'XTickLabel',[])
+ ylabel(['IMF' num2str(iMode) ' (mm/yr)'],'FontSize',7)
+ set(gca, 'FontSize', 8)
+ grid on;
+ yl(iMode, 1:2) = ylim;
+ end
+
+ % standardise y axes across all IMFs
+ y_lim_adopt = max(max(abs(yl(1:(NumIMFs-1), :))));
+ for iMode=1:(NumIMFs-1)
+ subplot(NumIMFs+1,1,iMode+1)
+ ylim([-y_lim_adopt y_lim_adopt]);
+ end
+ subplot(NumIMFs+1,1,NumIMFs+1);
+ ylim([-y_lim_adopt y_lim_adopt]+(mean(yl(NumIMFs, :))));
+
+ ylabel('Residual (mm/yr)','FontSize',7) % Rename bottom y-axis
+
+end
+
+function CreateFig2(R, N)
+
+ % Figure 2: variance vs average period
+ figure(2); hold on
+
+ % plot results for random noise
+ for iMode=1:size(N,2), scatter (N(iMode).imf_period', N(iMode).imf_var,4,[0.75 0.75 0.75],'+'); end
+
+ % plot results for historic data
+ for iMode=1:length(R.imf_period(~isnan(R.imf_period))), scatter (R.imf_period(iMode),R.imf_var(iMode),18,'red','filled'); end
+
+ % formatting
+ ylabel('variance','FontSize',8)
+ xlabel('average period (years)','FontSize',8) % one data point per year
+ set(gca, 'FontSize', 8)
+ set(gca,'xscale','log')
+ set(gca,'yscale','log')
+ title('Intrinsic Mode Functions: Variance vs average period','FontSize',11,'Interpreter', 'none')
+
+end
+
+function [modes,its]=ceemdan_v2014(x,Nstd,NR,MaxIter,SNRFlag, disp_toggle) % kf added disp_toggle
+
+%Function for CEEMDAN
+
+% This code downloaded January 2021 by Keirnan Fowler from:
+% http://bioingenieria.edu.ar/grupos/ldnlys/metorres/metorres_files/ceemdan_v2014.m
+
+%WARNING: for this code works it is necessary to include in the same
+%directoy the file emd.m developed by Rilling and Flandrin.
+%This file is available at %http://perso.ens-lyon.fr/patrick.flandrin/emd.html
+%We use the default stopping criterion.
+%We use the last modification: 3.2007
+
+% Syntax
+
+%modes=ceemdan(x,Nstd,NR,MaxIter,SNRFlag)
+%[modes its]=ceemdan(x,Nstd,NR,MaxIter,SNRFlag)
+
+% Description
+
+%OUTPUT
+%modes: contain the obtained modes in a matrix with the rows being the modes
+%its: contain the sifting iterations needed for each mode for each realization (one row for each realization)
+
+%INPUT
+%x: signal to decompose
+%Nstd: noise standard deviation
+%NR: number of realizations
+%MaxIter: maximum number of sifting iterations allowed.
+%SNRFlag: if equals 1, then the SNR increases for every stage, as in [1].
+% If equals 2, then the SNR is the same for all stages, as in [2].
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% The current is an improved version, introduced in:
+
+%[1] Colominas MA, Schlotthauer G, Torres ME. "Improve complete ensemble EMD: A suitable tool for biomedical signal processing"
+% Biomedical Signal Processing and Control vol. 14 pp. 19-29 (2014)
+
+%The CEEMDAN algorithm was first introduced at ICASSP 2011, Prague, Czech Republic
+
+%The authors will be thankful if the users of this code reference the work
+%where the algorithm was first presented:
+
+%[2] Torres ME, Colominas MA, Schlotthauer G, Flandrin P. "A Complete Ensemble Empirical Mode Decomposition with Adaptive Noise"
+% Proc. 36th Int. Conf. on Acoustics, Speech and Signa Processing ICASSP 2011 (May 22-27, Prague, Czech Republic)
+
+%Author: Marcelo A. Colominas
+%contact: macolominas@bioingenieria.edu.ar
+%Last version: 25 feb 2015
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+
+x=x(:)';
+desvio_x=std(x);
+x=x/desvio_x;
+
+modes=zeros(size(x));
+temp=zeros(size(x));
+aux=zeros(size(x));
+iter=zeros(NR,round(log2(length(x))+5));
+
+for i=1:NR
+ white_noise{i}=randn(size(x));%creates the noise realizations
+end;
+
+for i=1:NR
+ modes_white_noise{i}=emd(white_noise{i});%calculates the modes of white gaussian noise
+end;
+
+for i=1:NR %calculates the first mode
+ xi=x+Nstd*modes_white_noise{i}(1,:)/std(modes_white_noise{i}(1,:));
+ [temp, o, it]=emd(xi,'MAXMODES',1,'MAXITERATIONS',MaxIter);
+ temp=temp(1,:);
+ aux=aux+(xi-temp)/NR;
+ iter(i,1)=it;
+end;
+
+modes= x-aux; %saves the first mode
+medias = aux;
+k=1;
+aux=zeros(size(x));
+es_imf = min(size(emd(medias(end,:),'MAXMODES',1,'MAXITERATIONS',MaxIter)));
+
+while es_imf>1 %calculates the rest of the modes
+ for i=1:NR
+ tamanio=size(modes_white_noise{i});
+ if tamanio(1)>=k+1
+ noise=modes_white_noise{i}(k+1,:);
+ if SNRFlag == 2
+ noise=noise/std(noise); %adjust the std of the noise
+ end;
+ noise=Nstd*noise;
+ try
+ [temp,o,it]=emd(medias(end,:)+std(medias(end,:))*noise,'MAXMODES',1,'MAXITERATIONS',MaxIter);
+ catch
+ it=0;
+ if disp_toggle % added kf
+ disp('catch 1 '); disp(num2str(k));
+ end % added kf
+ temp=emd(medias(end,:)+std(medias(end,:))*noise,'MAXMODES',1,'MAXITERATIONS',MaxIter);
+ end;
+ temp=temp(end,:);
+ else
+ try
+ [temp, o, it]=emd(medias(end,:),'MAXMODES',1,'MAXITERATIONS',MaxIter);
+ catch
+ temp=emd(medias(end,:),'MAXMODES',1,'MAXITERATIONS',MaxIter);
+ it=0;
+ if disp_toggle % added kf
+ disp('catch 2 sin ruido')
+ end
+ end;
+ temp=temp(end,:);
+ end;
+ aux=aux+temp/NR;
+ iter(i,k+1)=it;
+ end;
+ modes=[modes;medias(end,:)-aux];
+ medias = [medias;aux];
+ aux=zeros(size(x));
+ k=k+1;
+ es_imf = min(size(emd(medias(end,:),'MAXMODES',1,'MAXITERATIONS',MaxIter)));
+end;
+modes = [modes;medias(end,:)];
+modes=modes*desvio_x;
+its=iter;
+
+end
+
+% all remaining functions were downloaded as January 2021 by Keirnan Fowler from:
+% http://perso.ens-lyon.fr/patrick.flandrin/pack_emd.zip
+% ref: 'emd.m'
+
+function [imf,ort,nbits] = emd(varargin)
+
+%EMD computes Empirical Mode Decomposition
+%
+%
+% Syntax
+%
+%
+% IMF = EMD(X)
+% IMF = EMD(X,...,'Option_name',Option_value,...)
+% IMF = EMD(X,OPTS)
+% [IMF,ORT,NB_ITERATIONS] = EMD(...)
+%
+%
+% Description
+%
+%
+% IMF = EMD(X) where X is a real vector computes the Empirical Mode
+% Decomposition [1] of X, resulting in a matrix IMF containing 1 IMF per row, the
+% last one being the residue. The default stopping criterion is the one proposed
+% in [2]:
+%
+% at each point, mean_amplitude < THRESHOLD2*envelope_amplitude
+% &
+% mean of boolean array {(mean_amplitude)/(envelope_amplitude) > THRESHOLD} < TOLERANCE
+% &
+% |#zeros-#extrema|<=1
+%
+% where mean_amplitude = abs(envelope_max+envelope_min)/2
+% and envelope_amplitude = abs(envelope_max-envelope_min)/2
+%
+% IMF = EMD(X) where X is a complex vector computes Bivariate Empirical Mode
+% Decomposition [3] of X, resulting in a matrix IMF containing 1 IMF per row, the
+% last one being the residue. The default stopping criterion is similar to the
+% one proposed in [2]:
+%
+% at each point, mean_amplitude < THRESHOLD2*envelope_amplitude
+% &
+% mean of boolean array {(mean_amplitude)/(envelope_amplitude) > THRESHOLD} < TOLERANCE
+%
+% where mean_amplitude and envelope_amplitude have definitions similar to the
+% real case
+%
+% IMF = EMD(X,...,'Option_name',Option_value,...) sets options Option_name to
+% the specified Option_value (see Options)
+%
+% IMF = EMD(X,OPTS) is equivalent to the above syntax provided OPTS is a struct
+% object with field names corresponding to option names and field values being the
+% associated values
+%
+% [IMF,ORT,NB_ITERATIONS] = EMD(...) returns an index of orthogonality
+% ________
+% _ |IMF(i,:).*IMF(j,:)|
+% ORT = \ _____________________
+% /
+% || X ||
+% i~=j
+%
+% and the number of iterations to extract each mode in NB_ITERATIONS
+%
+%
+% Options
+%
+%
+% stopping criterion options:
+%
+% STOP: vector of stopping parameters [THRESHOLD,THRESHOLD2,TOLERANCE]
+% if the input vector's length is less than 3, only the first parameters are
+% set, the remaining ones taking default values.
+% default: [0.05,0.5,0.05]
+%
+% FIX (int): disable the default stopping criterion and do exactly
+% number of sifting iterations for each mode
+%
+% FIX_H (int): disable the default stopping criterion and do sifting
+% iterations with |#zeros-#extrema|<=1 to stop [4]
+%
+% bivariate/complex EMD options:
+%
+% COMPLEX_VERSION: selects the algorithm used for complex EMD ([3])
+% COMPLEX_VERSION = 1: "algorithm 1"
+% COMPLEX_VERSION = 2: "algorithm 2" (default)
+%
+% NDIRS: number of directions in which envelopes are computed (default 4)
+% rem: the actual number of directions (according to [3]) is 2*NDIRS
+%
+% other options:
+%
+% T: sampling times (line vector) (default: 1:length(x))
+%
+% MAXITERATIONS: maximum number of sifting iterations for the computation of each
+% mode (default: 2000)
+%
+% MAXMODES: maximum number of imfs extracted (default: Inf)
+%
+% DISPLAY: if equals to 1 shows sifting steps with pause
+% if equals to 2 shows sifting steps without pause (movie style)
+% rem: display is disabled when the input is complex
+%
+% INTERP: interpolation scheme: 'linear', 'cubic', 'pchip' or 'spline' (default)
+% see interp1 documentation for details
+%
+% MASK: masking signal used to improve the decomposition according to [5]
+%
+%
+% Examples
+%
+%
+%X = rand(1,512);
+%
+%IMF = emd(X);
+%
+%IMF = emd(X,'STOP',[0.1,0.5,0.05],'MAXITERATIONS',100);
+%
+%T=linspace(0,20,1e3);
+%X = 2*exp(i*T)+exp(3*i*T)+.5*T;
+%IMF = emd(X,'T',T);
+%
+%OPTIONS.DISLPAY = 1;
+%OPTIONS.FIX = 10;
+%OPTIONS.MAXMODES = 3;
+%[IMF,ORT,NBITS] = emd(X,OPTIONS);
+%
+%
+% References
+%
+%
+% [1] N. E. Huang et al., "The empirical mode decomposition and the
+% Hilbert spectrum for non-linear and non stationary time series analysis",
+% Proc. Royal Soc. London A, Vol. 454, pp. 903-995, 1998
+%
+% [2] G. Rilling, P. Flandrin and P. Gonalves
+% "On Empirical Mode Decomposition and its algorithms",
+% IEEE-EURASIP Workshop on Nonlinear Signal and Image Processing
+% NSIP-03, Grado (I), June 2003
+%
+% [3] G. Rilling, P. Flandrin, P. Gonalves and J. M. Lilly.,
+% "Bivariate Empirical Mode Decomposition",
+% Signal Processing Letters (submitted)
+%
+% [4] N. E. Huang et al., "A confidence limit for the Empirical Mode
+% Decomposition and Hilbert spectral analysis",
+% Proc. Royal Soc. London A, Vol. 459, pp. 2317-2345, 2003
+%
+% [5] R. Deering and J. F. Kaiser, "The use of a masking signal to improve
+% empirical mode decomposition", ICASSP 2005
+%
+%
+% See also
+% emd_visu (visualization),
+% emdc, emdc_fix (fast implementations of EMD),
+% cemdc, cemdc_fix, cemdc2, cemdc2_fix (fast implementations of bivariate EMD),
+% hhspectrum (Hilbert-Huang spectrum)
+%
+%
+% G. Rilling, last modification: 3.2007
+% gabriel.rilling@ens-lyon.fr
+
+
+
+
+[x,t,sd,sd2,tol,MODE_COMPLEX,ndirs,display_sifting,sdt,sd2t,r,imf,k,nbit,NbIt,MAXITERATIONS,FIXE,FIXE_H,MAXMODES,INTERP,mask] = init(varargin{:});
+
+if display_sifting
+ fig_h = figure;
+end
+
+
+%main loop : requires at least 3 extrema to proceed
+while (~stop_EMD(r,MODE_COMPLEX,ndirs) && (k < MAXMODES+1 || MAXMODES == 0) && ~any(mask))
+
+ % current mode
+ m = r;
+
+ % mode at previous iteration
+ mp = m;
+
+ %computation of mean and stopping criterion
+ if FIXE
+ [stop_sift,moyenne] = stop_sifting_fixe(t,m,INTERP,MODE_COMPLEX,ndirs);
+ elseif FIXE_H
+ stop_count = 0;
+ [stop_sift,moyenne] = stop_sifting_fixe_h(t,m,INTERP,stop_count,FIXE_H,MODE_COMPLEX,ndirs);
+ else
+ [stop_sift,moyenne] = stop_sifting(m,t,sd,sd2,tol,INTERP,MODE_COMPLEX,ndirs);
+ end
+
+ % in case the current mode is so small that machine precision can cause
+ % spurious extrema to appear
+ if (max(abs(m))) < (1e-10)*(max(abs(x)))
+ if ~stop_sift
+ warning('emd:warning','forced stop of EMD : too small amplitude')
+ else
+ disp('forced stop of EMD : too small amplitude')
+ end
+ break
+ end
+
+
+ % sifting loop
+ while ~stop_sift && nbitMAXITERATIONS/5 && mod(nbit,floor(MAXITERATIONS/10))==0 && ~FIXE && nbit > 100)
+ disp(['mode ',int2str(k),', iteration ',int2str(nbit)])
+ if exist('s','var')
+ disp(['stop parameter mean value : ',num2str(s)])
+ end
+ [im,iM] = extr(m);
+ disp([int2str(sum(m(im) > 0)),' minima > 0; ',int2str(sum(m(iM) < 0)),' maxima < 0.'])
+ end
+
+ %sifting
+ m = m - moyenne;
+
+ %computation of mean and stopping criterion
+ if FIXE
+ [stop_sift,moyenne] = stop_sifting_fixe(t,m,INTERP,MODE_COMPLEX,ndirs);
+ elseif FIXE_H
+ [stop_sift,moyenne,stop_count] = stop_sifting_fixe_h(t,m,INTERP,stop_count,FIXE_H,MODE_COMPLEX,ndirs);
+ else
+ [stop_sift,moyenne,s] = stop_sifting(m,t,sd,sd2,tol,INTERP,MODE_COMPLEX,ndirs);
+ end
+
+ % display
+ if display_sifting && ~MODE_COMPLEX
+ NBSYM = 2;
+ [indmin,indmax] = extr(mp);
+ [tmin,tmax,mmin,mmax] = boundary_conditions(indmin,indmax,t,mp,mp,NBSYM);
+ envminp = interp1(tmin,mmin,t,INTERP);
+ envmaxp = interp1(tmax,mmax,t,INTERP);
+ envmoyp = (envminp+envmaxp)/2;
+ if FIXE || FIXE_H
+ display_emd_fixe(t,m,mp,r,envminp,envmaxp,envmoyp,nbit,k,display_sifting)
+ else
+ sxp=2*(abs(envmoyp))./(abs(envmaxp-envminp));
+ sp = mean(sxp);
+ display_emd(t,m,mp,r,envminp,envmaxp,envmoyp,s,sp,sxp,sdt,sd2t,nbit,k,display_sifting,stop_sift)
+ end
+ end
+
+ mp = m;
+ nbit=nbit+1;
+ NbIt=NbIt+1;
+
+ if(nbit==(MAXITERATIONS-1) && ~FIXE && nbit > 100)
+ if exist('s','var')
+ warning('emd:warning',['forced stop of sifting : too many iterations... mode ',int2str(k),'. stop parameter mean value : ',num2str(s)])
+ else
+ warning('emd:warning',['forced stop of sifting : too many iterations... mode ',int2str(k),'.'])
+ end
+ end
+
+ end % sifting loop
+ imf(k,:) = m;
+ if display_sifting
+ disp(['mode ',int2str(k),' stored'])
+ end
+ nbits(k) = nbit;
+ k = k+1;
+
+
+ r = r - m;
+ nbit=0;
+
+
+end %main loop
+
+if any(r) && ~any(mask)
+ imf(k,:) = r;
+end
+
+ort = io(x,imf);
+
+if display_sifting
+ close
+end
+end
+
+function stop = stop_EMD(r,MODE_COMPLEX,ndirs)
+%---------------------------------------------------------------------------------------------------
+% tests if there are enough (3) extrema to continue the decomposition
+if MODE_COMPLEX
+ for k = 1:ndirs
+ phi = (k-1)*pi/ndirs;
+ [indmin,indmax] = extr(real(exp(i*phi)*r));
+ ner(k) = length(indmin) + length(indmax);
+ end
+ stop = any(ner < 3);
+else
+ [indmin,indmax] = extr(r);
+ ner = length(indmin) + length(indmax);
+ stop = ner < 3;
+end
+end
+
+function [envmoy,nem,nzm,amp] = mean_and_amplitude(m,t,INTERP,MODE_COMPLEX,ndirs)
+%---------------------------------------------------------------------------------------------------
+% computes the mean of the envelopes and the mode amplitude estimate
+NBSYM = 2;
+if MODE_COMPLEX
+ switch MODE_COMPLEX
+ case 1
+ for k = 1:ndirs
+ phi = (k-1)*pi/ndirs;
+ y = real(exp(-i*phi)*m);
+ [indmin,indmax,indzer] = extr(y);
+ nem(k) = length(indmin)+length(indmax);
+ nzm(k) = length(indzer);
+ [tmin,tmax,zmin,zmax] = boundary_conditions(indmin,indmax,t,y,m,NBSYM);
+ envmin(k,:) = interp1(tmin,zmin,t,INTERP);
+ envmax(k,:) = interp1(tmax,zmax,t,INTERP);
+ end
+ envmoy = mean((envmin+envmax)/2,1);
+ if nargout > 3
+ amp = mean(abs(envmax-envmin),1)/2;
+ end
+ case 2
+ for k = 1:ndirs
+ phi = (k-1)*pi/ndirs;
+ y = real(exp(-i*phi)*m);
+ [indmin,indmax,indzer] = extr(y);
+ nem(k) = length(indmin)+length(indmax);
+ nzm(k) = length(indzer);
+ [tmin,tmax,zmin,zmax] = boundary_conditions(indmin,indmax,t,y,y,NBSYM);
+ envmin(k,:) = exp(i*phi)*interp1(tmin,zmin,t,INTERP);
+ envmax(k,:) = exp(i*phi)*interp1(tmax,zmax,t,INTERP);
+ end
+ envmoy = mean((envmin+envmax),1);
+ if nargout > 3
+ amp = mean(abs(envmax-envmin),1)/2;
+ end
+ end
+else
+ [indmin,indmax,indzer] = extr(m);
+ nem = length(indmin)+length(indmax);
+ nzm = length(indzer);
+ [tmin,tmax,mmin,mmax] = boundary_conditions(indmin,indmax,t,m,m,NBSYM);
+ envmin = interp1(tmin,mmin,t,INTERP);
+ envmax = interp1(tmax,mmax,t,INTERP);
+ envmoy = (envmin+envmax)/2;
+ if nargout > 3
+ amp = mean(abs(envmax-envmin),1)/2;
+ end
+end
+end
+
+function [stop,envmoy,s] = stop_sifting(m,t,sd,sd2,tol,INTERP,MODE_COMPLEX,ndirs)
+%-------------------------------------------------------------------------------
+% default stopping criterion
+try
+ [envmoy,nem,nzm,amp] = mean_and_amplitude(m,t,INTERP,MODE_COMPLEX,ndirs);
+ sx = abs(envmoy)./amp;
+ s = mean(sx);
+ stop = ~((mean(sx > sd) > tol | any(sx > sd2)) & (all(nem > 2)));
+ if ~MODE_COMPLEX
+ stop = stop && ~(abs(nzm-nem)>1);
+ end
+catch
+ stop = 1;
+ envmoy = zeros(1,length(m));
+ s = NaN;
+end
+end
+
+function [stop,moyenne]= stop_sifting_fixe(t,m,INTERP,MODE_COMPLEX,ndirs)
+%-------------------------------------------------------------------------------
+% stopping criterion corresponding to option FIX
+try
+ moyenne = mean_and_amplitude(m,t,INTERP,MODE_COMPLEX,ndirs);
+ stop = 0;
+catch
+ moyenne = zeros(1,length(m));
+ stop = 1;
+end
+end
+
+function [stop,moyenne,stop_count]= stop_sifting_fixe_h(t,m,INTERP,stop_count,FIXE_H,MODE_COMPLEX,ndirs)
+%-------------------------------------------------------------------------------
+% stopping criterion corresponding to option FIX_H
+try
+ [moyenne,nem,nzm] = mean_and_amplitude(m,t,INTERP,MODE_COMPLEX,ndirs);
+ if (all(abs(nzm-nem)>1))
+ stop = 0;
+ stop_count = 0;
+ else
+ stop_count = stop_count+1;
+ stop = (stop_count == FIXE_H);
+ end
+catch
+ moyenne = zeros(1,length(m));
+ stop = 1;
+end
+end
+
+function display_emd(t,m,mp,r,envmin,envmax,envmoy,s,sb,sx,sdt,sd2t,nbit,k,display_sifting,stop_sift)
+%-------------------------------------------------------------------------------
+% displays the progression of the decomposition with the default stopping criterion
+subplot(4,1,1)
+plot(t,mp);hold on;
+plot(t,envmax,'--k');plot(t,envmin,'--k');plot(t,envmoy,'r');
+title(['IMF ',int2str(k),'; iteration ',int2str(nbit),' before sifting']);
+set(gca,'XTick',[])
+hold off
+subplot(4,1,2)
+plot(t,sx)
+hold on
+plot(t,sdt,'--r')
+plot(t,sd2t,':k')
+title('stop parameter')
+set(gca,'XTick',[])
+hold off
+subplot(4,1,3)
+plot(t,m)
+title(['IMF ',int2str(k),'; iteration ',int2str(nbit),' after sifting']);
+set(gca,'XTick',[])
+subplot(4,1,4);
+plot(t,r-m)
+title('residue');
+disp(['stop parameter mean value : ',num2str(sb),' before sifting and ',num2str(s),' after'])
+if stop_sift
+ disp('last iteration for this mode')
+end
+if display_sifting == 2
+ pause(0.01)
+else
+ pause
+end
+end
+
+function display_emd_fixe(t,m,mp,r,envmin,envmax,envmoy,nbit,k,display_sifting)
+%---------------------------------------------------------------------------------------------------
+% displays the progression of the decomposition with the FIX and FIX_H stopping criteria
+subplot(3,1,1)
+plot(t,mp);hold on;
+plot(t,envmax,'--k');plot(t,envmin,'--k');plot(t,envmoy,'r');
+title(['IMF ',int2str(k),'; iteration ',int2str(nbit),' before sifting']);
+set(gca,'XTick',[])
+hold off
+subplot(3,1,2)
+plot(t,m)
+title(['IMF ',int2str(k),'; iteration ',int2str(nbit),' after sifting']);
+set(gca,'XTick',[])
+subplot(3,1,3);
+plot(t,r-m)
+title('residue');
+if display_sifting == 2
+ pause(0.01)
+else
+ pause
+end
+end
+
+function [tmin,tmax,zmin,zmax] = boundary_conditions(indmin,indmax,t,x,z,nbsym)
+%---------------------------------------------------------------------------------------
+% defines new extrema points to extend the interpolations at the edges of the
+% signal (mainly mirror symmetry)
+ lx = length(x);
+
+ if (length(indmin) + length(indmax) < 3)
+ error('not enough extrema')
+ end
+
+ % boundary conditions for interpolations :
+
+ if indmax(1) < indmin(1)
+ if x(1) > x(indmin(1))
+ lmax = fliplr(indmax(2:min(end,nbsym+1)));
+ lmin = fliplr(indmin(1:min(end,nbsym)));
+ lsym = indmax(1);
+ else
+ lmax = fliplr(indmax(1:min(end,nbsym)));
+ lmin = [fliplr(indmin(1:min(end,nbsym-1))),1];
+ lsym = 1;
+ end
+ else
+
+ if x(1) < x(indmax(1))
+ lmax = fliplr(indmax(1:min(end,nbsym)));
+ lmin = fliplr(indmin(2:min(end,nbsym+1)));
+ lsym = indmin(1);
+ else
+ lmax = [fliplr(indmax(1:min(end,nbsym-1))),1];
+ lmin = fliplr(indmin(1:min(end,nbsym)));
+ lsym = 1;
+ end
+ end
+
+ if indmax(end) < indmin(end)
+ if x(end) < x(indmax(end))
+ rmax = fliplr(indmax(max(end-nbsym+1,1):end));
+ rmin = fliplr(indmin(max(end-nbsym,1):end-1));
+ rsym = indmin(end);
+ else
+ rmax = [lx,fliplr(indmax(max(end-nbsym+2,1):end))];
+ rmin = fliplr(indmin(max(end-nbsym+1,1):end));
+ rsym = lx;
+ end
+ else
+ if x(end) > x(indmin(end))
+ rmax = fliplr(indmax(max(end-nbsym,1):end-1));
+ rmin = fliplr(indmin(max(end-nbsym+1,1):end));
+ rsym = indmax(end);
+ else
+ rmax = fliplr(indmax(max(end-nbsym+1,1):end));
+ rmin = [lx,fliplr(indmin(max(end-nbsym+2,1):end))];
+ rsym = lx;
+ end
+ end
+
+ tlmin = 2*t(lsym)-t(lmin);
+ tlmax = 2*t(lsym)-t(lmax);
+ trmin = 2*t(rsym)-t(rmin);
+ trmax = 2*t(rsym)-t(rmax);
+
+ % in case symmetrized parts do not extend enough
+ if tlmin(1) > t(1) || tlmax(1) > t(1)
+ if lsym == indmax(1)
+ lmax = fliplr(indmax(1:min(end,nbsym)));
+ else
+ lmin = fliplr(indmin(1:min(end,nbsym)));
+ end
+ if lsym == 1
+ error('bug')
+ end
+ lsym = 1;
+ tlmin = 2*t(lsym)-t(lmin);
+ tlmax = 2*t(lsym)-t(lmax);
+ end
+
+ if trmin(end) < t(lx) || trmax(end) < t(lx)
+ if rsym == indmax(end)
+ rmax = fliplr(indmax(max(end-nbsym+1,1):end));
+ else
+ rmin = fliplr(indmin(max(end-nbsym+1,1):end));
+ end
+ if rsym == lx
+ error('bug')
+ end
+ rsym = lx;
+ trmin = 2*t(rsym)-t(rmin);
+ trmax = 2*t(rsym)-t(rmax);
+ end
+
+ zlmax =z(lmax);
+ zlmin =z(lmin);
+ zrmax =z(rmax);
+ zrmin =z(rmin);
+
+ tmin = [tlmin t(indmin) trmin];
+ tmax = [tlmax t(indmax) trmax];
+ zmin = [zlmin z(indmin) zrmin];
+ zmax = [zlmax z(indmax) zrmax];
+end
+
+function [indmin, indmax, indzer] = extr(x,t)
+%---------------------------------------------------------------------------------------------------
+%extracts the indices of extrema
+if(nargin==1)
+ t=1:length(x);
+end
+
+m = length(x);
+
+if nargout > 2
+ x1=x(1:m-1);
+ x2=x(2:m);
+ indzer = find(x1.*x2<0);
+
+ if any(x == 0)
+ iz = find( x==0 );
+ indz = [];
+ if any(diff(iz)==1)
+ zer = x == 0;
+ dz = diff([0 zer 0]);
+ debz = find(dz == 1);
+ finz = find(dz == -1)-1;
+ indz = round((debz+finz)/2);
+ else
+ indz = iz;
+ end
+ indzer = sort([indzer indz]);
+ end
+end
+
+d = diff(x);
+
+n = length(d);
+d1 = d(1:n-1);
+d2 = d(2:n);
+indmin = find(d1.*d2<0 & d1<0)+1;
+indmax = find(d1.*d2<0 & d1>0)+1;
+
+
+% when two or more successive points have the same value we consider only one extremum in the middle of the constant area
+% (only works if the signal is uniformly sampled)
+
+if any(d==0)
+
+ imax = [];
+ imin = [];
+
+ bad = (d==0);
+ dd = diff([0 bad 0]);
+ debs = find(dd == 1);
+ fins = find(dd == -1);
+ if debs(1) == 1
+ if length(debs) > 1
+ debs = debs(2:end);
+ fins = fins(2:end);
+ else
+ debs = [];
+ fins = [];
+ end
+ end
+ if length(debs) > 0
+ if fins(end) == m
+ if length(debs) > 1
+ debs = debs(1:(end-1));
+ fins = fins(1:(end-1));
+
+ else
+ debs = [];
+ fins = [];
+ end
+ end
+ end
+ lc = length(debs);
+ if lc > 0
+ for k = 1:lc
+ if d(debs(k)-1) > 0
+ if d(fins(k)) < 0
+ imax = [imax round((fins(k)+debs(k))/2)];
+ end
+ else
+ if d(fins(k)) > 0
+ imin = [imin round((fins(k)+debs(k))/2)];
+ end
+ end
+ end
+ end
+
+ if length(imax) > 0
+ indmax = sort([indmax imax]);
+ end
+
+ if length(imin) > 0
+ indmin = sort([indmin imin]);
+ end
+
+end
+end
+
+function ort = io(x,imf)
+% ort = IO(x,imf) computes the index of orthogonality
+%
+% inputs : - x : analyzed signal
+% - imf : empirical mode decomposition
+
+n = size(imf,1);
+
+s = 0;
+
+for i = 1:n
+ for j =1:n
+ if i~=j
+ s = s + abs(sum(imf(i,:).*conj(imf(j,:)))/sum(x.^2));
+ end
+ end
+end
+
+ort = 0.5*s;
+end
+
+function [x,t,sd,sd2,tol,MODE_COMPLEX,ndirs,display_sifting,sdt,sd2t,r,imf,k,nbit,NbIt,MAXITERATIONS,FIXE,FIXE_H,MAXMODES,INTERP,mask] = init(varargin)
+
+x = varargin{1};
+if nargin == 2
+ if isstruct(varargin{2})
+ inopts = varargin{2};
+ else
+ error('when using 2 arguments the first one is the analyzed signal X and the second one is a struct object describing the options')
+ end
+elseif nargin > 2
+ try
+ inopts = struct(varargin{2:end});
+ catch
+ error('bad argument syntax')
+ end
+end
+
+% default for stopping
+defstop = [0.05,0.5,0.05];
+
+opt_fields = {'t','stop','display','maxiterations','fix','maxmodes','interp','fix_h','mask','ndirs','complex_version'};
+
+defopts.stop = defstop;
+defopts.display = 0;
+defopts.t = 1:max(size(x));
+defopts.maxiterations = 2000;
+defopts.fix = 0;
+defopts.maxmodes = 0;
+defopts.interp = 'spline';
+defopts.fix_h = 0;
+defopts.mask = 0;
+defopts.ndirs = 4;
+defopts.complex_version = 2;
+
+opts = defopts;
+
+
+
+if(nargin==1)
+ inopts = defopts;
+elseif nargin == 0
+ error('not enough arguments')
+end
+
+
+names = fieldnames(inopts);
+for nom = names'
+ if ~any(strcmpi(char(nom), opt_fields))
+ error(['bad option field name: ',char(nom)])
+ end
+ if ~isempty(eval(['inopts.',char(nom)])) % empty values are discarded
+ eval(['opts.',lower(char(nom)),' = inopts.',char(nom),';'])
+ end
+end
+
+t = opts.t;
+stop = opts.stop;
+display_sifting = opts.display;
+MAXITERATIONS = opts.maxiterations;
+FIXE = opts.fix;
+MAXMODES = opts.maxmodes;
+INTERP = opts.interp;
+FIXE_H = opts.fix_h;
+mask = opts.mask;
+ndirs = opts.ndirs;
+complex_version = opts.complex_version;
+
+if ~isvector(x)
+ error('X must have only one row or one column')
+end
+
+if size(x,1) > 1
+ x = x.';
+end
+
+if ~isvector(t)
+ error('option field T must have only one row or one column')
+end
+
+if ~isreal(t)
+ error('time instants T must be a real vector')
+end
+
+if size(t,1) > 1
+ t = t';
+end
+
+if (length(t)~=length(x))
+ error('X and option field T must have the same length')
+end
+
+if ~isvector(stop) || length(stop) > 3
+ error('option field STOP must have only one row or one column of max three elements')
+end
+
+if ~all(isfinite(x))
+ error('data elements must be finite')
+end
+
+if size(stop,1) > 1
+ stop = stop';
+end
+
+L = length(stop);
+if L < 3
+ stop(3)=defstop(3);
+end
+
+if L < 2
+ stop(2)=defstop(2);
+end
+
+
+if ~ischar(INTERP) || ~any(strcmpi(INTERP,{'linear','cubic','spline'}))
+ error('INTERP field must be ''linear'', ''cubic'', ''pchip'' or ''spline''')
+end
+
+%special procedure when a masking signal is specified
+if any(mask)
+ if ~isvector(mask) || length(mask) ~= length(x)
+ error('masking signal must have the same dimension as the analyzed signal X')
+ end
+
+ if size(mask,1) > 1
+ mask = mask.';
+ end
+ opts.mask = 0;
+ imf1 = emd(x+mask,opts);
+ imf2 = emd(x-mask,opts);
+ if size(imf1,1) ~= size(imf2,1)
+ warning('emd:warning',['the two sets of IMFs have different sizes: ',int2str(size(imf1,1)),' and ',int2str(size(imf2,1)),' IMFs.'])
+ end
+ S1 = size(imf1,1);
+ S2 = size(imf2,1);
+ if S1 ~= S2
+ if S1 < S2
+ tmp = imf1;
+ imf1 = imf2;
+ imf2 = tmp;
+ end
+ imf2(max(S1,S2),1) = 0;
+ end
+ imf = (imf1+imf2)/2;
+
+end
+
+
+sd = stop(1);
+sd2 = stop(2);
+tol = stop(3);
+
+lx = length(x);
+
+sdt = sd*ones(1,lx);
+sd2t = sd2*ones(1,lx);
+
+if FIXE
+ MAXITERATIONS = FIXE;
+ if FIXE_H
+ error('cannot use both ''FIX'' and ''FIX_H'' modes')
+ end
+end
+
+MODE_COMPLEX = ~isreal(x)*complex_version;
+if MODE_COMPLEX && complex_version ~= 1 && complex_version ~= 2
+ error('COMPLEX_VERSION parameter must equal 1 or 2')
+end
+
+
+% number of extrema and zero-crossings in residual
+ner = lx;
+nzr = lx;
+
+r = x;
+
+if ~any(mask) % if a masking signal is specified "imf" already exists at this stage
+ imf = [];
+end
+k = 1;
+
+% iterations counter for extraction of 1 mode
+nbit=0;
+
+% total iterations counter
+NbIt=0;
+end
\ No newline at end of file
diff --git a/table.m b/table.m
new file mode 100644
index 0000000..33d95d2
--- /dev/null
+++ b/table.m
@@ -0,0 +1,3683 @@
+## Copyright (C) 2019 Andrew Janke
+##
+## This program is free software; you can redistribute it and/or modify
+## it under the terms of the GNU General Public License as published by
+## the Free Software Foundation; either version 2 of the License, or
+## (at your option) any later version.
+##
+## This program is distributed in the hope that it will be useful,
+## but WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+## GNU General Public License for more details.
+##
+## You should have received a copy of the GNU General Public License
+## along with this program; If not, see .
+
+## -*- texinfo -*-
+## @deftp {Class} table
+##
+## Tabular data array containing multiple columnar variables.
+##
+## A @code{table} is a tabular data structure that collects multiple parallel
+## named variables.
+## Each variable is treated like a column. (Possibly a multi-columned column, if
+## that makes sense.)
+## The types of variables may be heterogeneous.
+##
+## A table object is like an SQL table or resultset, or a relation, or a
+## DataFrame in R or Pandas.
+##
+## A table is an array in itself: its size is @var{nrows}-by-@var{nvariables},
+## and you can index along the rows and variables by indexing into the table
+## along dimensions 1 and 2.
+##
+## A note on accessing properties of a @code{table} array: Because .-indexing is
+## used to access the variables inside the array, it can’t also be directly used
+## to access properties as well. Instead, do @code{t.Properties.} for
+## a table @code{t}. That will give you a property instead of a variable.
+## (And due to this mechanism, it will cause problems if you have a @code{table}
+## with a variable named @code{Properties}. Try to avoid that.)
+##
+## @end deftp
+##
+## @deftypeivar table @code{cellstr} VariableNames
+##
+## The names of the variables in the table, as a cellstr row vector.
+##
+## @end deftypeivar
+##
+## @deftypeivar table @code{cell} VariableValues
+##
+## A cell vector containing the values for each of the variables.
+## @code{VariableValues(i)} corresponds to @code{VariableNames(i)}.
+##
+## @end deftypeivar
+##
+## @deftypeivar table @code{cellstr} RowNames
+##
+## An optional list of row names that identify each row in the table. This
+## is a cellstr column vector, if present.
+##
+## @end deftypeivar
+
+% Developer's notes:
+% - Wherever you see the abbreviation "pk" here, that means "proxy keys", not
+% "primary keys".
+
+classdef table
+
+ properties
+ % The names of the variables (columns), as cellstr
+ VariableNames = {}
+ % The values of the variables, as a cell vector of arbitrary types
+ VariableValues = {}
+ % Optional row names, as cellstr
+ RowNames = []
+ % Dimension names
+ DimensionNames = { "Row" "Variables" }
+ end
+
+ methods
+ ## -*- texinfo -*-
+ ## @node table.table
+ ## @deftypefn {Constructor} {@var{obj} =} table ()
+ ##
+ ## Constructs a new empty (0 rows by 0 variables) table.
+ ##
+ ## @end deftypefn
+ ##
+ ## @deftypefn {Constructor} {@var{obj} =} table (@var{var1}, @var{var2}, @dots{}, @var{varN})
+ ##
+ ## Constructs a new table from the given variables. The variables passed as
+ ## inputs to this constructor become the variables of the table. Their names
+ ## are automatically detected from the input variable names that you used.
+ ##
+ ## @end deftypefn
+ ##
+ ## @deftypefn {Constructor} {@var{obj} =} table (@code{'Size'}, @var{sz}, @
+ ## @code{'VariableTypes'}, @var{varTypes})
+ ##
+ ## Constructs a new table of the given size, and with the given variable types.
+ ## The variables will contain the default value for elements of that type.
+ ##
+ ## @end deftypefn
+ ##
+ ## @deftypefn {Constructor} {@var{obj} =} table (@dots{}, @code{'VariableNames'}, @var{varNames})
+ ## @deftypefnx {Constructor} {@var{obj} =} table (@dots{}, @code{'RowNames'}, @var{rowNames})
+ ##
+ ## Specifies the variable names or row names to use in the constructed table.
+ ## Overrides the implicit names garnered from the input variable names.
+ ##
+ ## @end deftypefn
+ function this = table (varargin)
+
+ % Peel off trailing options
+ [opts, args] = peelOffNameValueOptions (varargin, {'VariableNames', 'RowNames'});
+
+ % Calling form handling
+ nargs = numel (args);
+ if nargs == 3 && isequal (args{1}, 'Backdoor')
+ % Undocumented form for internal use
+ [varNames, varVals] = args{2:3};
+ elseif nargs == 4 && isequal (args{1}, 'Size') && isequal (args{3}, 'VariableTypes')
+ error('table: This constructor form is unimplemented');
+ else
+ varVals = args;
+ if isfield (opts, 'VariableNames')
+ varNames = opts.VariableNames;
+ else
+ varNames = cell (size (args));
+ for i = 1:numel (args)
+ varNames{i} = inputname (i);
+ if isempty (varNames{i})
+ varNames{i} = sprintf('Var%d', i);
+ end
+ end
+ end
+ end
+
+ % Input validation
+ if ~iscell (varVals) || (~isvector (varVals) && ~isempty (varVals))
+ error('table: VariableValues must be a cell vector');
+ end
+ if ~iscellstr (varNames) || (~isvector (varNames) && ~isempty (varNames))
+ error('table: VariableNames must be a cellstr vector');
+ end
+ varNames = varNames(:)';
+ varVals = varVals(:)';
+ if numel (varNames) ~= numel (varVals)
+ error ('table: Inconsistent number of VariableNames (%d) and VariableValues (%d)', ...
+ numel (varNames), numel (varVals));
+ end
+ for i = 1:numel (varNames)
+ if ~isvarname (varNames{i})
+ error ('table: Invalid VariableName: ''%s''', varNames{i});
+ end
+ end
+ if ~isempty (varVals)
+ nrows = size (varVals{1}, 1);
+ for i = 2:numel (varVals)
+ if ndims (varVals{i}) > 2
+ error (['table: Variable values may not have > 2 dimensions; ' ...
+ 'input %d (%s) has %d'], i, varNames{i}, ndims (varVals{i}));
+ endif
+ nrows2 = size (varVals{i}, 1);
+ if nrows ~= nrows2
+ error ('table: Inconsistent sizes: var 1 (%s) is %d rows; var %d (%s) is %d rows', ...
+ varNames{1}, nrows, i, varNames{i}, nrows2);
+ end
+ end
+ end
+ [uqNames, ix] = unique (varNames);
+ if numel (uqNames) < numel (varNames)
+ ixBad = 1:numel (varNames);
+ ixBad(ix) = [];
+ error ('table: Duplicate VariableNames: %s', strjoin (varNames(ixBad), ', '));
+ end
+
+ % Construction
+ this.VariableNames = varNames;
+ this.VariableValues = varVals;
+ if isfield (opts, 'RowNames')
+ this.RowNames = opts.RowNames;
+ end
+ end
+
+ % Display
+
+ function display (this)
+ %DISPLAY Custom display.
+ in_name = inputname (1);
+ if ~isempty (in_name)
+ fprintf ('%s =\n', in_name);
+ end
+ disp (this);
+ end
+
+ function disp (this)
+ %DISP Custom display.
+ if isempty (this)
+ fprintf ('Empty %s %s\n', size2str (size (this)), class (this));
+ else
+ fprintf ('table: %d rows x %d variables\n', height (this), width (this));
+ fprintf (' VariableNames: %s\n', strjoin (this.VariableNames, ', '));
+ if hasrownames (this)
+ fprintf (' Has RowNames\n');
+ endif
+ end
+ end
+
+ ## -*- texinfo -*-
+ ## @node table.summary
+ ## @deftypefn {Method} summary (@var{obj})
+ ##
+ ## Summary of table's data.
+ ##
+ ## Displays a summary of data in the input table. This will contain some
+ ## statistical information on each of its variables.
+ ##
+ ## @end deftypefn
+ function out = summary (this)
+ summary_impl (this);
+ endfunction
+
+ ## -*- texinfo -*-
+ ## @node table.prettyprint
+ ## @deftypefn {Method} {} prettyprint (@var{obj})
+ ##
+ ## Display table's values in tabular format. This prints the contents
+ ## of the table in human-readable, tabular form.
+ ##
+ ## Variables which contain objects are displayed using the strings
+ ## returned by their @code{dispstrs} method, if they define one.
+ ##
+ ## @end deftypefn
+ function prettyprint (this)
+ %PRETTYPRINT Display table values, formatted as a table
+ if isempty (this)
+ fprintf ('Empty %s %s\n', size2str (size (this)), class (this));
+ return;
+ end
+ nVars = width (this);
+ varNames = this.VariableNames;
+ % Here, "cols" means output columns, not data columns. Each data variable
+ % will be displayed in a single output column.
+ %FIXME: This display logic might be broken for multi-column variables.
+ colNames = varNames;
+
+ nCols = nVars;
+ colNames = varNames;
+ colStrs = cell (1, nVars);
+ for iVar = 1:numel (this.VariableValues)
+ vals = this.VariableValues{iVar};
+ strs = tablevar_dispstrs (vals);
+ lines = cell (height(this), 1);
+ for iRow = 1:size (strs, 1)
+ lines{iRow} = strjoin (strs(iRow,:), ' ');
+ endfor
+ colStrs{iVar} = lines;
+ endfor
+ if hasrownames (this)
+ colStrs = [{this.RowNames(:)} colStrs];
+ colNames = [{'RowName'} colNames];
+ nCols++;
+ endif
+
+ colWidths = NaN (1, nCols);
+ for iCol = 1:nCols
+ colWidths(iCol) = max (cellfun ('numel', colStrs{iCol}));
+ endfor
+ nameWidths = cellfun ('numel', colNames);
+ colWidths = max ([nameWidths; colWidths]);
+ totalWidth = sum (colWidths) + 4 + (3 * (nCols - 1));
+ elementStrs = cat (2, colStrs{:});
+
+ rowFmts = cell (1, nCols);
+ for i = 1:nCols
+ rowFmts{i} = ['%-' num2str(colWidths(i)) 's'];
+ end
+ rowFmt = ['| ' strjoin(rowFmts, ' | ') ' |' sprintf('\n')];
+ fprintf ('%s\n', repmat ('-', [1 totalWidth]));
+ fprintf (rowFmt, colNames{:});
+ fprintf ('%s\n', repmat ('-', [1 totalWidth]));
+ for i = 1:height (this)
+ fprintf (rowFmt, elementStrs{i,:});
+ end
+ fprintf ('%s\n', repmat ('-', [1 totalWidth]));
+ end
+
+ % Type conversion
+
+ ## -*- texinfo -*-
+ ## @node table.table2cell
+ ## @deftypefn {Method} {@var{c} =} table2cell (@var{obj})
+ ##
+ ## Converts table to a cell array. Each variable in @var{obj} becomes
+ ## one or more columns in the output, depending on how many columns
+ ## that variable has.
+ ##
+ ## Returns a cell array with the same number of rows as @var{obj}, and
+ ## with as many or more columns as @var{obj} has variables.
+ ##
+ ## @end deftypefn
+ function out = table2cell (this)
+ out = cell (size (this));
+ for i = 1:width (this)
+ varVal = this.VariableValues{i};
+ if iscell (varVal)
+ if size (varVal, 2) == 1
+ out(:,i) = varVal;
+ else
+ out(:,i) = mat2cell (varVal, ones (1, size (varVal, 2)));
+ endif
+ else
+ out(:,i) = num2cell (varVal, 2);
+ endif
+ endfor
+ endfunction
+
+ ## -*- texinfo -*-
+ ## @node table.table2struct
+ ## @deftypefn {Method} {@var{s} =} table2struct (@var{obj})
+ ## @deftypefnx {Method} {@var{s} =} table2struct (@dots{}, @code{'ToScalar'}, @var{trueOrFalse})
+ ##
+ ## Converts @var{obj} to a scalar structure or structure array.
+ ##
+ ## Row names are not included in the output struct. To include them, you
+ ## must add them manually:
+ ## s = table2struct (tbl, 'ToScalar', true);
+ ## s.RowNames = tbl.Properties.RowNames;
+ ##
+ ## Returns a scalar struct or struct array, depending on the value of the
+ ## @code{ToScalar} option.
+ ##
+ ## @end deftypefn
+ function out = table2struct (this, varargin)
+ [opts, args] = peelOffNameValueOptions (varargin, {'ToScalar'});
+ toScalar = false;
+ if isfield (opts, 'ToScalar')
+ mustBeA (opts.ToScalar, 'logical');
+ mustBeScalar (opts.ToScalar);
+ toScalar = opts.ToScalar;
+ endif
+
+ if toScalar
+ out = struct;
+ for i = 1:width (this)
+ out.(this.VariableNames{i}) = this.VariableValues{i};
+ endfor
+ else
+ %TODO: Figure out how to implement this efficiently using cell2struct
+ s0 = struct;
+ for i = 1:width (this)
+ s0.(this.VariableNames{i}) = [];
+ endfor
+ out = repmat (s0, [height(this) 1]);
+ for iVar = 1:width (this)
+ varVal = this.VariableValues{iVar};
+ for iRow = 1:height (this)
+ if iscell (varVal) && size (varVal, 2) == 1
+ elVal = varVal{iRow};
+ else
+ elVal = varVal(iRow,:);
+ endif
+ out(iRow).(this.VariableNames{iVar}) = elVal;
+ endfor
+ endfor
+ endif
+ endfunction
+
+ ## -*- texinfo -*-
+ ## @node table.table2array
+ ## @deftypefn {Method} {@var{s} =} table2struct (@var{obj})
+ ##
+ ## Converts @var{obj} to a homogeneous array.
+ ##
+ ## @end deftypefn
+ function out = table2array (this)
+ %TABLE2ARRAY Convert table to homogeneous array
+ if isempty (this)
+ out = [];
+ return
+ endif
+ % Wow, this is an easy implementation
+ % TODO: Doesn't work for mixed cell and non-cell variable values, because of the
+ % implicit conversion of numerics to single cells. Dunno if we want to add
+ % more graceful support for that or not.
+ out = cat (2, this.VariableValues{:});
+ endfunction
+
+ % Structural stuff
+
+ ## -*- texinfo -*-
+ ## @node table.varnames
+ ## @deftypefn {Method} {@var{out} =} varnames (@var{obj})
+ ## @deftypefnx {Method} {@var{out} =} varnames (@var{obj}, @var{varNames})
+ ##
+ ## Get or set variable names for a table.
+ ##
+ ## Returns cellstr in the getter form. Returns an updated datetime in the
+ ## setter form.
+ ##
+ ## @end deftypefn
+ function out = varnames (this, newVarNames)
+ if nargin == 1
+ out = this.VariableNames;
+ else
+ out = this;
+ out.VariableNames = newVarNames;
+ endif
+ endfunction
+
+ ## -*- texinfo -*-
+ ## @node table.istable
+ ## @deftypefn {Method} {@var{tf} =} istable (@var{obj})
+ ##
+ ## True if input is a table.
+ ##
+ ## @end deftypefn
+ function out = istable (this)
+ %ISTABLE True if input is a table
+ out = true;
+ endfunction
+
+ ## -*- texinfo -*-
+ ## @node table.size
+ ## @deftypefn {Method} {@var{sz} =} size (@var{obj})
+ ##
+ ## Gets the size of a table.
+ ##
+ ## For tables, the size is [number-of-rows x number-of-variables].
+ ## This is the same as @code{[height(obj), width(obj)]}.
+ ##
+ ## @end deftypefn
+ function out = size (this, dim)
+ if nargin == 1
+ out = [height(this), width(this)];
+ else
+ sz = size (this);
+ out = sz(dim);
+ end
+ end
+
+ ## -*- texinfo -*-
+ ## @node table.length
+ ## @deftypefn {Method} {@var{out} =} length (@var{obj})
+ ##
+ ## Length along longest dimension
+ ##
+ ## Use of @code{length} is not recommended. Use @code{numel}
+ ## or @code{size} instead.
+ ##
+ ## @end deftypefn
+ function out = length (this, varargin)
+ out = max (size (this));
+ end
+
+ ## -*- texinfo -*-
+ ## @node table.ndims
+ ## @deftypefn {Method} {@var{out} =} ndims (@var{obj})
+ ##
+ ## Number of dimensions
+ ##
+ ## For tables, @code{ndims(obj)} is always 2.
+ ##
+ ## @end deftypefn
+ function out = ndims (this)
+ %NDIMS Number of dimensions
+ %
+ % For tables, ndims is always 2.
+ out = 2;
+ end
+
+ ## -*- texinfo -*-
+ ## @node table.squeeze
+ ## @deftypefn {Method} {@var{obj} =} squeeze (@var{obj})
+ ##
+ ## Remove singleton dimensions.
+ ##
+ ## For tables, this is always a no-op that returns the input
+ ## unmodified, because tables always have exactly 2 dimensions.
+ ##
+ ## @end deftypefn
+ function out = squeeze (this)
+ %SQUEEZE Remove singleton dimensions from this.
+ %
+ % This is always a no-op that returns the input unmodified, because tables
+ % always have exactly 2 dimensions.
+ out = this;
+ endfunction
+
+ function out = size_equal
+ %SIZE_EQUAL True if the dimensions of all arguments agree.
+ error ('table.size_equal: size_equal is not yet implemented for tables');
+ endfunction
+
+ ## -*- texinfo -*-
+ ## @node table.sizeof
+ ## @deftypefn {Method} {@var{out} =} sizeof (@var{obj})
+ ##
+ ## Approximate size of array in bytes. For tables, this returns the sume
+ ## of @code{sizeof} for all of its variables’ arrays, plus the size of the
+ ## VariableNames and any other metadata stored in @var{obj}.
+ ##
+ ## This is currently unimplemented.
+ ##
+ ## @end deftypefn
+ function out = sizeof (this)
+ total_size = 0;
+ total_size += sizeof(this.VariableNames);
+ for i = 1:width(this)
+ total_size += sizeof(this.VariableValues{i});
+ endfor
+ total_size += sizeof(this.RowNames);
+ out = total_size;
+ endfunction
+
+ ## -*- texinfo -*-
+ ## @node table.height
+ ## @deftypefn {Method} {@var{out} =} height (@var{obj})
+ ##
+ ## Number of rows in table.
+ ##
+ ## @end deftypefn
+ function out = height (this)
+ %HEIGHT Number of rows in table
+ if isempty (this.VariableValues)
+ out = 0;
+ else
+ out = size (this.VariableValues{1}, 1);
+ end
+ end
+
+ ## -*- texinfo -*-
+ ## @node table.rows
+ ## @deftypefn {Method} {@var{out} =} rows (@var{obj})
+ ##
+ ## Number of rows in table.
+ ##
+ ## @end deftypefn
+ function out = rows (this)
+ %ROWS Number of rows in table
+ out = height (this);
+ endfunction
+
+ ## -*- texinfo -*-
+ ## @node table.width
+ ## @deftypefn {Method} {@var{out} =} width (@var{obj})
+ ##
+ ## Number of variables in table.
+ ##
+ ## Note that this is not the sum of the number of columns in each variable.
+ ## It is just the number of variables.
+ ##
+ ## @end deftypefn
+ function out = width (this)
+ %WIDTH Number of variables in table
+ %
+ % Note that this is not the sum of the number of columns in each variable.
+ % It is just the number of variables.
+ out = numel (this.VariableNames);
+ end
+
+ ## -*- texinfo -*-
+ ## @node table.columns
+ ## @deftypefn {Method} {@var{out} =} columns (@var{obj})
+ ##
+ ## Number of variables in table.
+ ##
+ ## Note that this is not the sum of the number of columns in each variable.
+ ## It is just the number of variables.
+ ##
+ ## @end deftypefn
+ function out = columns (this)
+ %COLUMNS Number of columns (variables) in table
+ %
+ % Note that this is not the sum of the number of columns in each variable.
+ % It is just the number of variables.
+ out = width (this);
+ endfunction
+
+ ## -*- texinfo -*-
+ ## @node table.numel
+ ## @deftypefn {Method} {@var{out} =} numel (@var{obj})
+ ##
+ ## Total number of elements in table.
+ ##
+ ## This is the total number of elements in this table. This is calculated
+ ## as the sum of numel for each variable.
+ ##
+ ## NOTE: Those semantics may be wrong. This may actually need to be defined
+ ## as @code{height(obj) * width(obj)}. The behavior of @code{numel} may
+ ## change in the future.
+ ##
+ ## @end deftypefn
+ function out = numel (this)
+ n = 0;
+ for i = 1:numel (this.VariableValues)
+ n = n + numel (this.VariableValues{i});
+ end
+ out = n;
+ end
+
+ ## -*- texinfo -*-
+ ## @node table.isempty
+ ## @deftypefn {Method} {@var{out} =} isempty (@var{obj})
+ ##
+ ## Test whether array is empty.
+ ##
+ ## For tables, @code{isempty} is true if the number of rows is 0 or the number
+ ## of variables is 0.
+ ##
+ ## @end deftypefn
+ function out = isempty (this)
+ out = isempty (this.VariableNames);
+ end
+
+ ## -*- texinfo -*-
+ ## @node table.ismatrix
+ ## @deftypefn {Method} {@var{out} =} ismatrix (@var{obj})
+ ##
+ ## Test whether array is a matrix.
+ ##
+ ## For tables, @code{ismatrix} is always true, by definition.
+ ##
+ ## @end deftypefn
+ function out = ismatrix (this)
+ %ISMATRIX True if input is a matrix
+ %
+ % For tables, ismatrix() is always true, by definition.
+ out = true;
+ end
+
+ ## -*- texinfo -*-
+ ## @node table.isrow
+ ## @deftypefn {Method} {@var{out} =} isrow (@var{obj})
+ ##
+ ## Test whether array is a row vector.
+ ##
+ ## @end deftypefn
+ function out = isrow (this)
+ %ISROW True if input is a row vector
+ out = height (this) == 1;
+ end
+
+ ## -*- texinfo -*-
+ ## @node table.iscol
+ ## @deftypefn {Method} {@var{out} =} iscol (@var{obj})
+ ##
+ ## Test whether array is a column vector.
+ ##
+ ## For tables, @code{iscol} is true if the input has a single variable.
+ ## The number of columns within that variable does not matter.
+ ##
+ ## @end deftypefn
+ function out = iscol (this)
+ out = width (this) == 1;
+ end
+
+ ## -*- texinfo -*-
+ ## @node table.isvector
+ ## @deftypefn {Method} {@var{out} =} isvector (@var{obj})
+ ##
+ ## Test whether array is a vector.
+ ##
+ ## @end deftypefn
+ function out = isvector (this)
+ %ISVECTOR True if input is a vector
+ out = isrow (this) || iscol (this);
+ end
+
+ ## -*- texinfo -*-
+ ## @node table.isscalar
+ ## @deftypefn {Method} {@var{out} =} isscalar (@var{obj})
+ ##
+ ## Test whether array is scalar.
+ ##
+ ## @end deftypefn
+ function out = isscalar (this)
+ %ISSCALAR True if input is a scalar
+ out = height (this) == 1 && width (this) == 1;
+ end
+
+ ## -*- texinfo -*-
+ ## @node table.hasrownames
+ ## @deftypefn {Method} {@var{out} =} hasrownames (@var{obj})
+ ##
+ ## True if this table has row names defined.
+ ##
+ ## @end deftypefn
+ function out = hasrownames (this)
+ %HASROWNAMES True if this table has row names defined
+ out = ~isempty (this.RowNames);
+ endfunction
+
+ ## -*- texinfo -*-
+ ## @node table.vertcat
+ ## @deftypefn {Method} {@var{out} =} vertcat (@var{varargin})
+ ##
+ ## Vertical concatenation.
+ ##
+ ## Combines tables by vertically concatenating them.
+ ##
+ ## Inputs that are not tables are automatically converted to tables by calling
+ ## table() on them.
+ ##
+ ## The inputs must have the same number and names of variables, and their
+ ## variable value types and sizes must be cat-compatible.
+ ##
+ ## @end deftypefn
+ function out = vertcat (varargin)
+ args = varargin;
+ for i = 1:numel (args)
+ if ~istable (args{i})
+ args{i} = table (args{i});
+ end
+ end
+ mustBeAllSameVars (args{:});
+ out = args{1};
+ for i = 2:numel (args)
+ if isempty (out.RowNames)
+ if ~isempty (args{i}.RowNames)
+ error ('table.vertcat: Cannot cat tables with mixed empty and non-empty RowNames');
+ end
+ else
+ out.RowNames = [out.RowNames; args{i}.RowNames];
+ end
+ for iVar = 1:width (out)
+ out.VariableValues{iVar} = [out.VariableValues{iVar}; args{i}.VariableValues{iVar}];
+ end
+ end
+ end
+
+ ## -*- texinfo -*-
+ ## @node table.horzcat
+ ## @deftypefn {Method} {@var{out} =} horzcat (@var{varargin})
+ ##
+ ## Horizontal concatenation.
+ ##
+ ## Combines tables by horizontally concatenating them.
+ ## Inputs that are not tables are automatically converted to tables by calling
+ ## table() on them.
+ ## Inputs must have all distinct variable names.
+ ##
+ ## Output has the same RowNames as @code{varargin@{1@}}. The variable names and values
+ ## are the result of the concatenation of the variable names and values lists
+ ## from the inputs.
+ ##
+ ## @end deftypefn
+ function out = horzcat (varargin)
+ args = varargin;
+ seen_names = args{1}.VariableNames;
+ for i = 2:numel (args)
+ dup_names = intersect (seen_names, args{i}.VariableNames);
+ if ~isempty (dup_names)
+ error ('table.horzcat: Inputs have duplicate VariableNames: %s', strjoin (dup_names, ', '));
+ end
+ seen_names = [seen_names args{i}.VariableNames];
+ end
+ % Okay, all var names are distinct. We can just concatenate.
+ varNameses = cell (size (args));
+ varValses = cell (size (args));
+ for i = 1:numel (args)
+ varNameses{i} = args{i}.VariableNames;
+ varValses{i} = args{i}.VariableValues;
+ endfor
+ out = varargin{1};
+ out.VariableNames = cat (2, varNameses{:});
+ out.VariableValues = cat (2, varValses{:});
+ end
+
+ ## -*- texinfo -*-
+ ## @node table.repmat
+ ## @deftypefn {Method} {@var{out} =} repmat (@var{obj}, @var{sz})
+ ##
+ ## Replicate matrix.
+ ##
+ ## Repmats a table by repmatting each of its variables vertically.
+ ##
+ ## For tables, repmatting is only supported along dimension 1. That is, the
+ ## values of sz(2:end) must all be exactly 1.
+ ##
+ ## Returns a new table with the same variable names and types as tbl, but
+ ## with a possibly different row count.
+ ##
+ ## @end deftypefn
+ function out = repmat (this, sz)
+ mustBeA (this, 'table');
+ mustBeNumeric (sz);
+ if any (sz(2:end) != 1)
+ error ('table.repmat: all size elements for dim 2 and higher must be 1');
+ endif
+ out = this;
+ for i = 1:numel (this.VariableValues)
+ out.VariableValues{i} = repmat (this.VariableValues{i}, sz);
+ endfor
+ if ! isempty (this.RowNames)
+ out.RowNames = repmat (this.RowNames, sz);
+ endif
+ endfunction
+
+ ## -*- texinfo -*-
+ ## @node table.repelem
+ ## @deftypefn {Method} {@var{out} =} repelem (@var{obj}, @var{R})
+ ## @deftypefnx {Method} {@var{out} =} repelem (@var{obj}, @var{R_1}, @var{R_2})
+ ##
+ ## Replicate elements of matrix.
+ ##
+ ## Replicates elements of this table matrix by applying repelem to each of
+ ## its variables.
+ ##
+ ## Only two dimensions are supported for @code{repelem} on tables.
+ ##
+ ## @end deftypefn
+ function out = repelem(this, varargin);
+ args = varargin;
+ if numel (args) > 2
+ error ("table.repelem: Only 2 dimensions are supported for repelem on tables");
+ endif
+ out = this;
+ for i = 1:width (this)
+ out.VariableValues{i} = repelem (this.VariableValues{i}, args{:});
+ endfor
+ endfunction
+
+ function out = subsref (this, s)
+ %SUBSREF Subscripted reference
+ %
+ % tbl.VarName
+ % tbl.VarName(ix)
+ % tbl.(VarName)
+ % tbl(ixRows, ixVars)
+ % tbl{ixRow, ixVar}
+ % tbl{ixRow, ixVar}(ix)
+ %
+ % Table supports various forms of indexing that allow you to subset the
+ % table or get at the variable values within the table.
+ chain_s = s(2:end);
+ s = s(1);
+ switch s(1).type
+ case '()'
+ if numel (s.subs) ~= 2
+ error ('table.subsref: ()-indexing of table requires exactly two arguments');
+ end
+ [ixRow, ixVar] = resolveRowVarRefs (this, s.subs{1}, s.subs{2});
+ out = this;
+ out = subsetrows (out, ixRow);
+ out = subsetvars (out, ixVar);
+ case '{}'
+ if numel (s.subs) ~= 2
+ error ('table.subsref: {}-indexing of table requires exactly two arguments');
+ end
+ [ixRow, ixVar] = resolveRowVarRefs (this, s.subs{1}, s.subs{2});
+ if numel (ixRow) ~= 1 && numel (ixVar) ~= 1
+ error('table.subsref: {}-indexing of table requires one of the inputs to be scalar');
+ end
+ %FIXME: I'm not sure how to handle the signature here yet
+ if numel (ixVar) > 1
+ error ('table.subsref: {}-indexing across multiple variables is currently unimplemented');
+ end
+ if numel (ixRow) > 1
+ error ('table.subsref: {}-indexing across multiple rows is currently unimplemented');
+ end
+ varData = this.VariableValues{ixVar};
+ out = varData(ixRow);
+ case '.'
+ name = s.subs;
+ if ~ischar (name)
+ error ('table.subsref: .-reference arguments must be char');
+ end
+ % Special cases for special properties and other attribute access
+ % TODO: should variable names or dimension names take precedence?
+ if isequal (name, 'Properties')
+ out = getProperties (this);
+ elseif isequal (name, this.DimensionNames{1})
+ out = this.RowNames;
+ elseif isequal (name, this.DimensionNames{2})
+ out = this.VariableNames;
+ else
+ out = getvar (this, name);
+ endif
+ end
+ % Chained references
+ if ~isempty (chain_s)
+ out = subsref (out, chain_s);
+ end
+ end
+
+ function out = subsasgn (this, s, val)
+ %SUBSASGN Subscripted assignment
+
+ % Chained subscripts
+ chain_s = s(2:end);
+ s = s(1);
+ if ~isempty (chain_s)
+ rhs_in = subsref (this, s);
+ rhs = subsasgn (rhs_in, chain_s, val);
+ else
+ rhs = val;
+ end
+
+ out = this;
+ switch s(1).type
+ case '()'
+ error ('table.subsasgn: Assignment using ()-indexing is not supported for table');
+ case '{}'
+ if numel (s.subs) ~= 2
+ error ('table.subsasgn: {}-indexing of table requires exactly two arguments');
+ end
+ [ixRow, ixVar] = resolveRowVarRefs (this, s.subs{1}, s.subs{2});
+ if ~isscalar (ixVar)
+ error ('table.subsasgn: {}-indexing must reference a single variable; got %d', ...
+ numel (ixVar));
+ end
+ varData = this.VariableValues{ixVar};
+ varData(ixRow) = rhs;
+ out.VariableValues{ixVar} = varData;
+ case '.'
+ if ~ischar (s.subs)
+ error ('table.subsasgn: .-index argument must be char; got a %s', ...
+ class (s.subs));
+ endif
+ if isequal (s.subs, 'Properties')
+ % Special case for this.Properties access
+ error ('table.subsasgn: .Properties access is not implemented yet');
+ else
+ out = setvar (this, s.subs, rhs);
+ endif
+ end
+ end
+
+ ## -*- texinfo -*-
+ ## @node table.setVariableNames
+ ## @deftypefn {Method} {@var{out} =} setVariableNames (@var{obj}, @var{names})
+ ## @deftypefnx {Method} {@var{out} =} setVariableNames (@var{obj}, @var{ix}, @var{names})
+ ##
+ ## Set variable names.
+ ##
+ ## Sets the @code{VariableNames} for this table to a new list of names.
+ ##
+ ## @var{names} is a char or cellstr vector. It must have the same number of elements
+ ## as the number of variable names being assigned.
+ ##
+ ## @var{ix} is an index vector indicating which variable names to set. If
+ ## omitted, it sets all of them present in @var{obj}.
+ ##
+ ## This method exists because the @code{obj.Properties.VariableNames = @dots{}}
+ ## assignment form does not work, possibly due to an Octave bug.
+ ##
+ ## @end deftypefn
+ function this = setVariableNames (this, varargin)
+ %SETVARIABLENAMES Set VariableNames
+ narginchk (2, 3);
+ if nargin == 2
+ ix = [];
+ names = varargin{1};
+ else
+ [ix, names] = varargin{:};
+ endif
+ if ischar (names)
+ names = cellstr (names);
+ endif
+ mustBeCellstr (names);
+ if ~all (cellfun (@isvarname, names))
+ error ('table: VariableNames must be valid variable names');
+ endif
+ if isempty (ix)
+ n_assigned = width (this);
+ else
+ n_assigned = numel (ix);
+ endif
+ if numel (names) != n_assigned
+ error ('table: Dimension mismatch: assigning to %d variable names but new VariableNames is %d-long', ...
+ n_assigned, numel (names));
+ endif
+ if isempty (ix)
+ this.VariableNames = names;
+ else
+ if any (ix > width (this))
+ error ('table: index out of range during variable name assignment: %d (vs. %d variables in this)', ...
+ max (ix), width (this));
+ endif
+ this.VariableNames(ix) = names;
+ endif
+ endfunction
+
+ ## -*- texinfo -*-
+ ## @node table.setDimensionNames
+ ## @deftypefn {Method} {@var{out} =} setDimensionNames (@var{obj}, @var{names})
+ ## @deftypefnx {Method} {@var{out} =} setDimensionNames (@var{obj}, @var{ix}, @var{names})
+ ##
+ ## Set dimension names.
+ ##
+ ## Sets the @code{DimensionNames} for this table to a new list of names.
+ ##
+ ## @var{names} is a char or cellstr vector. It must have the same number of elements
+ ## as the number of dimension names being assigned.
+ ##
+ ## @var{ix} is an index vector indicating which dimension names to set. If
+ ## omitted, it sets all two of them. Since there are always two dimension,
+ ## the indexes in @var{ix} may never be higher than 2.
+ ##
+ ## This method exists because the @code{obj.Properties.DimensionNames = @dots{}}
+ ## assignment form does not work, possibly due to an Octave bug.
+ ##
+ ## @end deftypefn
+ function this = setDimensionNames (this, varargin)
+ narginchk (2, 3);
+ if nargin == 2
+ ix = [];
+ names = varargin{1};
+ else
+ [ix, names] = varargin{:};
+ endif
+ if ischar (names)
+ names = cellstr (names);
+ endif
+ mustBeCellstr (names);
+ if ~all (cellfun (@isvarname, names))
+ error ('table.setDimensionNames: DimensionNames must be valid variable names');
+ endif
+ if isempty (ix)
+ n_assigned = 2;
+ else
+ n_assigned = numel (ix);
+ endif
+ if numel (names) != n_assigned
+ error ('table.setDimensionNames: Dimension mismatch: assigning to %d dimension names but new DimensionNames is %d-long', ...
+ n_assigned, numel (names));
+ endif
+ if isempty (ix)
+ this.DimensionNames = names;
+ else
+ if any (ix > 2)
+ error ('table.setDimensionNames: index out of range: %d (max is %d)', ...
+ max (ix), 2);
+ endif
+ this.DimensionNames(ix) = names;
+ endif
+ endfunction
+
+
+ ## -*- texinfo -*-
+ ## @node table.setRowNames
+ ## @deftypefn {Method} {@var{out} =} setRowNames (@var{obj}, @var{names})
+ ##
+ ## Set row names.
+ ##
+ ## Sets the row names on @var{obj} to @var{names}.
+ ##
+ ## @var{names} is a cellstr column vector, with the same number of rows as
+ ## @var{obj} has.
+ ##
+ ## @end deftypefn
+ function this = setRowNames (this, names)
+ %SETROWNAMES Set RowNames
+ if isempty (names)
+ this.RowNames = [];
+ return;
+ endif
+ if ~iscellstr (names)
+ error ('table: RowNames must be cellstr; got a %s', class (names));
+ endif
+ if ~isequal (size (names), [height(this), 1])
+ error ('table: Dimension mismatch: table has %d rows but new RowNames is %s', ...
+ height (this), size2str (size (names)));
+ endif
+ this.RowNames = names;
+ endfunction
+
+ ## -*- texinfo -*-
+ ## @node table.removevars
+ ## @deftypefn {Method} {@var{out} =} removevars (@var{obj}, @var{vars})
+ ##
+ ## Remove variables from table.
+ ##
+ ## Deletes the variables specified by @var{vars} from @var{obj}.
+ ##
+ ## @var{vars} may be a char, cellstr, numeric index vector, or logical
+ ## index vector.
+ ##
+ ## @end deftypefn
+ function out = removevars (this, vars)
+ ixVar = resolveVarRef (this, vars);
+ out = this;
+ out.VariableNames(ixVar) = [];
+ out.VariableValues(ixVar) = [];
+ endfunction
+
+ ## -*- texinfo -*-
+ ## @node table.movevars
+ ## @deftypefn {Method} {@var{out} =} movevars (@var{obj}, @var{vars}, @var{relLocation}, @var{location})
+ ##
+ ## Move around variables in a table.
+ ##
+ ## @var{vars} is a list of variables to move, specified by name or index.
+ ##
+ ## @var{relLocation} is @code{'Before'} or @code{'After'}.
+ ##
+ ## @var{location} indicates a single variable to use as the target location,
+ ## specified by name or index. If it is specified by index, it is the index
+ ## into the list of *unmoved* variables from @var{obj}, not the original full
+ ## list of variables in @var{obj}.
+ ##
+ ## Returns a table with the same variables as @var{obj}, but in a different order.
+ ##
+ ## @end deftypefn
+ function out = movevars (this, vars, relLocation, location)
+ if ~ischar (relLocation)
+ error ('table.movevars: relLocation must be char; got %s', class (relLocation));
+ endif
+ if ~ismember (relLocation, {'Before', 'After'})
+ error ('table.movevars: relLocation must be ''Before'' or ''After''; got ''%s''', ...
+ relLocation);
+ endif
+ ixVar = resolveVarRef (this, vars);
+ ixOtherVars = 1:width (this);
+ ixOtherVars(ixVar) = [];
+ tmp = subsetvars (this, ixOtherVars);
+ moved = subsetvars (this, ixVar);
+ ixLoc = resolveVarRef (tmp, location);
+ if ~isscalar (ixLoc)
+ error ('table.movevars: location must specify a single existing variable');
+ endif
+ if isequal(relLocation, 'Before')
+ insertionIx = ixLoc;
+ else
+ insertionIx = ixLoc = 1;
+ endif
+ left = subsetvars (tmp, 1:insertionIx-1);
+ right = subsetvars (tmp, insertionIx:width (tmp));
+ out = [left moved right];
+ endfunction
+
+ ## -*- texinfo -*-
+ ## @node table.getvar
+ ## @deftypefn {Method} {[@var{out}, @var{name}]} = getvar (@var{obj}, @var{varRef})
+ ##
+ ## Get value and name for single table variable.
+ ##
+ ## @var{varRef} is a variable reference. It may be a name or an index. It
+ ## may only specify a single table variable.
+ ##
+ ## Returns:
+ ## @var{out} – the value of the referenced table variable
+ ## @var{name} – the name of the referenced table variable
+ ##
+ ## @end deftypefn
+ function [out, name] = getvar (this, var_ref)
+ [ix_var, var_names] = resolveVarRef (this, var_ref);
+ if ! isscalar (ix_var)
+ error ('table.getvar: getvar only accepts a single variable reference; got %d', ...
+ numel (ix_var));
+ endif
+ out = this.VariableValues{ix_var};
+ name = var_names{1};
+ endfunction
+
+ ## -*- texinfo -*-
+ ## @node table.getvars
+ ## @deftypefn {Method} {[@var{out1}, @dots{}]} = getvars (@var{obj}, @var{varRef})
+ ##
+ ## Get values for one ore more table variables.
+ ##
+ ## @var{varRef} is a variable reference in the form of variable names or
+ ## indexes.
+ ##
+ ## Returns as many outputs as @var{varRef} referenced variables. Each output
+ ## contains the contents of the corresponding table variable.
+ ##
+ ## @end deftypefn
+ function varargout = getvars (this, name)
+ [ix_var, var_names] = resolveVarRef (this, name);
+ varargout = cell (1:numel (ix_var));
+ [varargout{:}] = this.VariableValues{ix_var};
+ endfunction
+
+ ## -*- texinfo -*-
+ ## @node table.setvar
+ ## @deftypefn {Method} {@var{out} =} setvar (@var{obj}, @var{varRef}, @var{value})
+ ##
+ ## Set value for a variable in table.
+ ##
+ ## This sets (adds or replaces) the value for a variable in @var{obj}. It
+ ## may be used to change the value of an existing variable, or add a new
+ ## variable.
+ ##
+ ## This method exists primarily because I cannot get @code{obj.foo = value} to work,
+ ## apparently due to an issue with Octave's subsasgn support.
+ ##
+ ## @var{varRef} is a variable reference, either the index or name of a variable.
+ ## If you are adding a new variable, it must be a name, and not an index.
+ ##
+ ## @var{value} is the value to set the variable to. If it is scalar or
+ ## a single string as charvec, it is scalar-expanded to match the number
+ ## of rows in @var{obj}.
+ ##
+ ## @end deftypefn
+ function out = setvar (this, varRef, value)
+ ixVar = resolveVarRef (this, varRef, 'lenient');
+ if ! isscalar (ixVar)
+ error('table.setvar: Only a single variable is allowed for varRef; got %d', ...
+ numel (ixVar));
+ endif
+ out = this;
+ % Scalar expansion
+ value_in = value;
+ n_rows = height (this);
+ val_is_scalar = isscalar(value) || (ischar(value) && ...
+ (size (value, 1) == 1 || isequal (size (value), [0 0])));
+ if n_rows != 1 && val_is_scalar
+ if ischar (value)
+ value = { value };
+ endif
+ value = repmat (value, [n_rows 1]);
+ endif
+ if size (value, 1) ~= height (this)
+ error ('table.setvar: Inconsistent dimensions: table is height %d, input is height %d', ...
+ height (this), size (value, 1));
+ end
+ if ixVar == 0
+ % Adding a variable
+ if ! ischar (varRef)
+ error (['table.setvar: When adding a variable, you must supply the ' ...
+ 'variable name instead of an index']);
+ endif
+ ix_new_var = width (this) + 1;
+ out.VariableNames{ix_new_var} = varRef;
+ out.VariableValues{ix_new_var} = value;
+ else
+ % Changing a variable
+ out.VariableValues{ixVar} = value;
+ endif
+ end
+
+ ## -*- texinfo -*-
+ ## @node table.addvars
+ ## @deftypefn {Method} {@var{out} =} addvars (@var{obj}, @var{var1}, @dots{}, @var{varN})
+ ## @deftypefnx {Method} {@var{out} =} addvars (@dots{}, @code{'Before'}, @var{Before})
+ ## @deftypefnx {Method} {@var{out} =} addvars (@dots{}, @code{'After'}, @var{After})
+ ## @deftypefnx {Method} {@var{out} =} addvars (@dots{}, @
+ ## @code{'NewVariableNames'}, @var{NewVariableNames})
+ ##
+ ## Add variables to table
+ ##
+ ## Adds the specified variables to a table.
+ ##
+ ## @end deftypefn
+ function out = addvars (this, varargin)
+ [opts, args] = peelOffNameValueOptions (varargin, ...
+ {'Before', 'After', 'NewVariableNames'});
+ ix_insertion = width (this);
+ if isfield (opts, 'Before')
+ ix_insertion = resolveVarRef (this, opts.Before) - 1;
+ endif
+ if isfield (opts, 'After')
+ ix_insertion = resolveVarRef (this, opts.After);
+ endif
+
+ new_var_vals = args;
+ if isfield (opts, 'NewVariableNames')
+ new_var_names = opts.NewVariableNames;
+ if numel (new_var_names) != numel (new_var_vals)
+ error ('table.addvars: size mismatch: got %d variables but %d names', ...
+ numel (new_var_vals), numel (new_var_names));
+ endif
+ else
+ new_var_names = cell (size (args));
+ for i = 1:numel (new_var_vals)
+ new_var_names{i} = inputname (i + 1);
+ if isempty (new_var_names{i})
+ error ('table.addvars: no variable name found for input %d', i + 1);
+ endif
+ endfor
+ endif
+
+ new_tbl = table (new_var_vals{:}, 'VariableNames', new_var_names);
+ if ix_insertion == width (this)
+ out = [this new_tbl];
+ elseif ix_insertion == 0
+ out = [new_tbl this];
+ else
+ left = subsetvars (this, 1:ix_insertion);
+ right = subsetvars (this, ix_insertion+1:end);
+ out = [left new_tbl right];
+ endif
+ endfunction
+
+ ## -*- texinfo -*-
+ ## @node table.convertvars
+ ## @deftypefn {Method} {@var{out} =} convertvars (@var{obj}, @var{vars}, @var{dataType})
+ ##
+ ## Convert variables to specified data type.
+ ##
+ ## Converts the variables in @var{obj} specified by @var{vars} to the specified data type.
+ ##
+ ## @var{vars} is a cellstr or numeric vector specifying which variables to convert.
+ ##
+ ## @var{dataType} specifies the data type to convert those variables to. It is either
+ ## a char holding the name of the data type, or a function handle which will
+ ## perform the conversion. If it is the name of the data type, there must
+ ## either be a one-arg constructor of that type which accepts the specified
+ ## variables' current types as input, or a conversion method of that name
+ ## defined on the specified variables' current type.
+ ##
+ ## Returns a table with the same variable names as @var{obj}, but with converted
+ ## types.
+ ##
+ ## @end deftypefn
+ function out = convertvars (this, vars, dataType)
+ if ~ischar (dataType) && isa (dataType, 'function_handle')
+ error ('table.convertvars: dataType must be char or function_handle; got a %s', ...
+ class (dataType));
+ endif
+ ixVars = resolveVarRef (this, vars);
+ out = this;
+ for i = 1:numel (ixVars)
+ ixVar = ixVars(i);
+ out.VariableValues{ixVar} = feval (dataType, this.VariableValues{ixVar});
+ endfor
+ endfunction
+
+ ## -*- texinfo -*-
+ ## @node table.mergevars
+ ## @deftypefn {Method} {@var{out} =} mergevars (@var{obj}, @var{vars})
+ ## @deftypefnx {Method} {@var{out} =} mergevars (@dots{}, @
+ ## @code{'NewVariableName'}, @var{NewVariableName})
+ ## @deftypefnx {Method} {@var{out} =} mergevars (@dots{}, @
+ ## @code{'MergeAsTable'}, @var{MergeAsTable})
+ ##
+ ## Merge table variables into a single variable.
+ ##
+ ## @end deftypefn
+ function out = mergevars (this, vars, varargin)
+ [opts, args] = peelOffNameValueOptions (varargin, ...
+ {'NewVariableName', 'MergeAsTable'});
+ if isfield (opts, 'MergeAsTable')
+ merge_as_table = opts.MergeAsTable;
+ else
+ merge_as_table = false;
+ endif
+ [ix_vars, var_names] = resolveVarRef (this, vars);
+ [ix_vars, ix_sort] = sort(ix_vars);
+ var_names = var_names(ix_sort);
+ if isfield (opts, 'NewVariableName')
+ new_var_name = opts.NewVariableName;
+ else
+ new_var_name = var_names{1};
+ endif
+
+ ix_all_vars = 1:width (this);
+ ix_vars_left = ix_all_vars;
+ ix_vars_left(ix_vars) = [];
+
+ merged_as_tbl = subsetvars (this, ix_vars);
+ leftover = subsetvars (this, ix_vars_left);
+ if merge_as_table
+ new_var_value = merged_as_tbl;
+ else
+ new_var_value = cat (2, merged_as_tbl.VariableValues{:});
+ endif
+ out = addvars (leftover, new_var_value, 'After', ix_vars(1)-1, ...
+ 'NewVariableNames', {new_var_name});
+ endfunction
+
+
+ ## -*- texinfo -*-
+ ## @node table.splitvars
+ ## @deftypefn {Method} {@var{out} =} splitvars (@var{obj})
+ ## @deftypefnx {Method} {@var{out} =} splitvars (@var{obj}, @var{vars})
+ ## @deftypefnx {Method} {@var{out} =} splitvars (@dots{}, @
+ ## @code{'NewVariableNames'}, @var{NewVariableNames})
+ ##
+ ## Split multicolumn table variables.
+ ##
+ ## Splits multicolumn table variables into new single-column variables.
+ ## If @var{vars} is supplied, splits only those variables. If @var{vars}
+ ## is not supplied, splits all multicolumn variables.
+ ##
+ ## @end deftypefn
+ function out = splitvars(this, varargin)
+ [opts, args] = peelOffNameValueOptions (varargin, {'NewVariableNames'});
+ if isfield (opts, 'NewVariableNames')
+ new_var_names = opts.NewVariableNames;
+ else
+ new_var_names = [];
+ endif
+ if isempty(args)
+ do_all_vars = true;
+ else
+ do_all_vars = false;
+ vars_to_split = args{1};
+ endif
+
+ if do_all_vars
+ vars_to_split = [];
+ for i=1:width (this)
+ if size (this.VariableValues{i}, 2) > 1
+ vars_to_split(end+1) = i;
+ endif
+ endfor
+ endif
+ [ix_vars, old_var_names] = resolveVarRef (this, vars_to_split);
+ [ix_vars, ix_sort] = sort (ix_vars);
+ old_var_names = old_var_names(ix_sort);
+ if numel (ix_vars) > 1 && ! isempty (new_var_names)
+ error ('NewVariableNames may only be specified when splitting a single variable');
+ endif
+
+ out = this;
+ for i_var = numel(ix_vars):-1:1
+ ix_var = ix_vars(i_var);
+ old_val = out.VariableValues{ix_var};
+ if isa (old_val, 'table')
+ new_var_vals = old_val.VariableValues;
+ else
+ new_var_vals = num2cell (old_val, 1);
+ endif
+ if isempty (new_var_names)
+ if istable (old_val)
+ my_new_var_names = old_val.VariableNames;
+ else
+ my_new_var_names = cell (size (new_var_vals));
+ for i_new_var = 1:numel (my_new_var_names)
+ my_new_var_names{i_new_var} = sprintf('%s_%d', old_var_names{i_var}, ...
+ i_new_var);
+ endfor
+ endif
+ else
+ my_new_var_names = new_var_names;
+ endif
+
+ out = removevars (out, ix_var);
+ out = addvars (out, new_var_vals{:}, 'NewVariableNames', my_new_var_names, ...
+ 'Before', ix_var);
+ endfor
+ endfunction
+
+ ## -*- texinfo -*-
+ ## @node table.stack
+ ## @deftypefn {Method} {@var{out} =} stack (@var{obj}, @var{vars})
+ ## @deftypefnx {Method} {@var{out} =} stack (@dots{}, @
+ ## @code{'NewDataVariableName'}, @var{NewDataVariableName})
+ ## @deftypefnx {Method} {@var{out} =} stack (@dots{}, @
+ ## @code{'IndexVariableName'}, @var{IndexVariableName})
+ ##
+ ## Stack multiple table variables into a single variable.
+ ##
+ ## @end deftypefn
+ function out = stack (this, varRef, varargin)
+ [opts, args] = peelOffNameValueOptions (varargin, ...
+ {'NewDataVariableName', 'IndexVariableName', 'ConstantVariables'});
+ index_var_name = [];
+ if isfield (opts, 'IndexVariableName')
+ index_var_name = opts.IndexVariableName;
+ endif
+ new_data_var_name = [];
+ if isfield (opts, 'NewDataVariableName')
+ new_data_var_name = opts.NewDataVariableName;
+ endif
+
+ [ix_vars, var_names] = resolveVarRef (this, varRef);
+ if isfield (opts, 'ConstantVariables')
+ [ix_const_vars, const_var_names] = resolveVarRef (this, opts.ConstantVariables);
+ else
+ ix_const_vars = setdiff (1:width (this), ix_vars);
+ endif
+
+ tbl = subsetvars (this, ix_const_vars);
+ n_rows_orig = height (this);
+ n_ctgs = numel (ix_vars);
+ ctg_run = categorical (var_names)';
+ tbl = repelem (tbl, n_ctgs, 1);
+
+ % Do some fancy indexing to arrange the stacked values
+ stk_vals = this.VariableValues(ix_vars);
+ stk_mat = cat(2, stk_vals{:});
+ stk_mat = stk_mat';
+ stk_var_vals = stk_mat(:);
+ index_var_vals = repmat(ctg_run, [n_rows_orig 1]);
+ if isempty (new_data_var_name)
+ new_data_var_name = strjoin(var_names, '_');
+ endif
+ if isempty (index_var_name)
+ index_var_name = [new_data_var_name '_Indicator'];
+ endif
+ stk_tbl = table(index_var_vals, stk_var_vals, ...
+ 'VariableNames', {index_var_name, new_data_var_name});
+
+ out = [tbl stk_tbl];
+ endfunction
+
+ ## -*- texinfo -*-
+ ## @node table.head
+ ## @deftypefn {Method} {@var{out} =} head (@var{obj})
+ ## @deftypefnx {Method} {@var{out} =} head (@var{obj}, @var{k})
+ ##
+ ## Get first K rows of table.
+ ##
+ ## Returns the first @var{k} rows of @var{obj}, as a table.
+ ##
+ ## @var{k} defaults to 8.
+ ##
+ ## If there are less than @var{k} rows in @var{obj}, returns all rows.
+ ##
+ ## @end deftypefn
+ function out = head (this, k)
+ if nargin < 2 || isempty (k)
+ k = 8;
+ endif
+ nRows = height (this);
+ if nRows < k
+ out = this;
+ return;
+ endif
+ out = subsetrows (this, 1:k);
+ endfunction
+
+ ## -*- texinfo -*-
+ ## @node table.tail
+ ## @deftypefn {Method} {@var{out} =} tail (@var{obj})
+ ## @deftypefnx {Method} {@var{out} =} tail (@var{obj}, @var{k})
+ ##
+ ## Get last K rows of table.
+ ##
+ ## Returns the last @var{k} rows of @var{obj}, as a table.
+ ##
+ ## @var{k} defaults to 8.
+ ##
+ ## If there are less than @var{k} rows in @var{obj}, returns all rows.
+ ##
+ ## @end deftypefn
+ function out = tail (this, k)
+ %TAIL Get last K rows of table
+ %
+ % out = tail (this, k)
+ %
+ % Returns the last k rows of this. K defaults to 8.
+ %
+ % If there are less than k rows in this, returns all rows.
+ if nargin < 2 || isempty (k)
+ k = 8;
+ endif
+ nRows = height (this);
+ if nRows < k
+ out = this;
+ return;
+ endif
+ out = subsetrows (this, [(nRows - (k - 1)):nRows]);
+ endfunction
+
+ function out = transpose (this)
+ out = transpose_table (this);
+ end
+
+ function out = ctranspose (this)
+ out = transpose_table (this);
+ end
+
+ % Relational operations
+
+ function [out, index] = sortrows (this, varargin)
+ %SORTROWS Sort rows of table
+ %
+ % [out, index] = sortrows (this, varargin)
+ %
+ % Sorts the rows of this based on the values in their variables.
+
+ % Parse input signature
+ % This is tricky because the sortrows() signature is complicated and ambiguous
+ args = varargin;
+ doRowNames = false;
+ direction = 'ascend';
+ varRef = ':';
+ knownOptions = {'MissingPlacement', 'ComparisonMethod'};
+ [opts, ~, optArgs] = peelOffNameValueOptions (args, knownOptions);
+ if ~isempty (args) && (ischar (args{end}) || iscellstr (args{end})) ...
+ && all (ismember (args{end}, {'ascend','descend'}))
+ direction = args{end};
+ args(end) = [];
+ end
+ if numel (args) > 1
+ error ('table.sortrows: Unrecognized options or arguments');
+ endif
+ if !isempty (args)
+ %FIXME: Determine how to identify the "rowDimName" argument
+ if ischar (args{1}) && isequal (args{1}, 'RowNames')
+ doRowNames = true;
+ else
+ varRef = args{1};
+ endif
+ args(end) = [];
+ endif
+
+ % Interpret inputs
+
+ % Do sorting
+ if doRowNames
+ % Special case: sort by row names
+ if isempty (this.rowNames)
+ error ('table.sortrows: this table does not have row names');
+ endif
+ [sortedRowNames, index] = sortrows (this.RowNames, direction, optArgs{:});
+ out = subsetrows (this, index);
+ else
+ % General case
+ ixVars = resolveVarRef (this, varRef);
+ if ischar (direction)
+ direction = cellstr (direction);
+ endif
+ if isscalar (direction)
+ directions = repmat (direction, size (ixVars));
+ else
+ directions = direction;
+ endif
+ if ~isequal (size (directions), size (ixVars))
+ error ('table.sortrows: inconsistent size between direction and vars specifier');
+ endif
+ % Perform a radix sort on the referenced variables
+ index = 1:height (this);
+ tmp = this;
+ for iStep = 1:numel(ixVars)
+ iVar = numel(ixVars) - iStep + 1;
+ ixVar = ixVars(iVar);
+ varVal = tmp.VariableValues{ixVar};
+ %Convert Matlab-style 'descend' arg to Octave-style negative column index
+ %TODO: Add support for Octave-style negative column indexes in table.sortrows input.
+ %TODO: Wrap this arg munging logic in a sortrows_matlabby() function
+ %TODO: Better error message when optArgs are is present.
+ if isequal (directions{iVar}, 'descend')
+ [~, ix] = sortrows (varVal, -1 * (1:size (varVal, 2)), optArgs{:});
+ else
+ [~, ix] = sortrows (varVal, optArgs{:});
+ endif
+ index = index(ix);
+ tmp = subsetrows (tmp, ix);
+ endfor
+ out = subsetrows (this, index);
+ endif
+ endfunction
+
+ function out = issortedrows (this, varargin)
+ %ISSORTEDROWS Determine if table rows are sorted
+ %
+ % out = issortedrows (this, varargin)
+ %
+ % TODO: Document my signature.
+ [~, ix] = sortrows (this, varargin{:});
+ out = isequal (ix, 1:height (this));
+ endfunction
+
+ function [out, ia] = topkrows (this, k, varargin)
+ %TOPKROWS Top rows in sorted order
+ %
+ % [out, ia] = topkrows (this, k, varargin)
+ %
+ % TODO: Document my signature.
+ [sorted, ix] = sortrows (this, varargin);
+ if k > height (sorted)
+ out = sorted;
+ ia = ix;
+ else
+ out = sorted(1:k,:);
+ ia = ix(1:k);
+ end
+ endfunction
+
+ function [out, ia, ic] = unique (this, varargin)
+ %UNIQUE Unique row values
+ %
+ % [out, ia, ic] = unique (this)
+ % [out, ia, ic] = unique (..., 'first'/'last')
+ % [out, ia, ic] = unique (..., 'legacy')
+ % [out, ia, ic] = unique (..., 'stable'/'sorted')
+ %
+ % The 'legacy', 'stable', and 'sorted' flags are currently unsupported
+ % because Octave's core unique() function does not support them. If you use
+ % them, you will get an error with a possibly-obsure error message.
+ %
+ % You can also pass a 'rows' flag, but it is effectively ignored, because
+ % row-wise operation is the only mode supported for unique() on tables.
+ validFlags = {'rows', 'legacy', 'stable', 'sorted', 'first', 'last'};
+ redundantFlags = {'rows'};
+ if ~iscellstr (varargin)
+ error ('table.unique: Invalid inputs: args 2 and later must be char flags');
+ endif
+ flags = varargin;
+ invalidFlags = setdiff (varargin, validFlags);
+ if ~isempty (invalidFlags)
+ error ('table.unique: Invalid flags: %s', strjoin(invalidFlags, ', '));
+ endif
+ tfRedundant = ismember (flags, redundantFlags);
+ flags(tfRedundant) = [];
+
+ pk = proxykeysForMatrixes (this);
+ [uPk, ia, ic] = unique (pk, 'rows', flags{:});
+ out = subsetrows (this, ia);
+ endfunction
+
+ ## -*- texinfo -*-
+ ## @node table.join
+ ## @deftypefn {Method} {[@var{C}, @var{ib}] =} join (@var{A}, @var{B})
+ ## @deftypefnx {Method} {[@var{C}, @var{ib}] =} join (@var{A}, @var{B}, @dots{})
+ ##
+ ## Combine two tables by rows using key variables, in a restricted form.
+ ##
+ ## This is not a "real" relational join operation. It has the restrictions
+ ## that:
+ ## 1) The key values in B must be unique.
+ ## 2) Every key value in A must map to a key value in B.
+ ## These are restrictions inherited from the Matlab definition of table.join.
+ ##
+ ## You probably don’t want to use this method. You probably want to use
+ ## innerjoin or outerjoin instead.
+ ##
+ ## See also: @ref{table.innerjoin}, @ref{table.outerjoin}
+ ##
+ ## @end deftypefn
+ function [C, ib] = join (A, B, varargin)
+ % TODO: Implement options
+
+ % Input munging
+ optNames = {'Keys', 'KeepOneCopy', 'LeftKeys', 'RightKeys', ...
+ 'LeftVariables', 'RightVariables'};
+ opts = peelOffNameValueOptions (varargin, optNames);
+ unimplementedOptions = optNames;
+ for i = 1:numel (unimplementedOptions)
+ if isfield (opts, unimplementedOptions{i})
+ error ('table.join: Option %s is unimplemented. Sorry.', ...
+ unimplementedOptions{i});
+ endif
+ endfor
+ if !istable (A)
+ A = table (A);
+ endif
+ if !istable (B)
+ B = table (B);
+ endif
+
+ % Join logic
+ keyVarNames = intersect_stable (A.VariableNames, B.VariableNames);
+ nonKeyVarsB = setdiff_stable (B.VariableNames, keyVarNames);
+ if isempty (keyVarNames)
+ error ('table.join: Cannot join: inputs have no variable names in common');
+ endif
+ keysA = subsetvars (A, keyVarNames);
+ keysB = subsetvars (B, keyVarNames);
+ uKeysB = unique (keysB);
+ if height (uKeysB) < height (keysB)
+ error ('table.join: Non-unique keys in B');
+ endif
+ [pkA, pkB] = proxykeysForMatrixes (keysA, keysB);
+ [tf, ib] = ismember (pkA, pkB, 'rows');
+ if ~all (tf)
+ error ('table.join: Some rows in A had no corresponding key values in B');
+ endif
+ % TODO: Once we've applied the key restrictions, can we just call
+ % realjoin() here?
+ outA = A;
+ nonKeysB = subsetvars (B, nonKeyVarsB);
+ outB = subsetrows (nonKeysB, ib);
+ C = [outA outB];
+ endfunction
+
+ function out = resolveJoinKeysAndVars(A, B, opts)
+ %RESOLVEJOINKEYSANDVARS Internal implementation method
+ if isfield (opts, 'Keys')
+ if isnumeric (opts.Keys) || islogical (opts.Keys)
+ if islogical (opts.Keys)
+ keyIxA = find (opts.Keys);
+ keyIxB = find (opts.Keys);
+ else
+ keyIxA = opts.Keys;
+ keyIxB = opts.Keys;
+ endif
+ keyNamesA = A.VariableNames(keyIxA);
+ keyNamesB = B.VariableNames(keyIxB);
+ elseif ischar (opts.Keys) || iscellstr (opts.Keys)
+ keyNamesA = cellstr (opts.Keys);
+ keyNamesB = cellstr (opts.Keys);
+ [tf, keyIxA] = ismember (keyNamesA, A.VariableNames);
+ if ! all (tf)
+ error ('Named keys not found in table A: %s', strjoin (keyNamesA(!tf), ', '));
+ endif
+ [tf, keyIxB] = ismember (keyNamesB, B.VariableNames);
+ if ! all (tf)
+ error ('Named keys not found in table B: %s', strjoin (keyNamesB(!tf), ', '));
+ endif
+ endif
+ elseif isfield (opts, 'LeftKeys')
+ if ! isfield (opts, 'RightKeys')
+ error ('If option LeftKeys is supplied, then RightKeys must be, too.');
+ endif
+ if isnumeric (opts.LeftKeys) || islogical (opts.LeftKeys)
+ if islogical (opts.LeftKeys)
+ keyIxA = find (opts.LeftKeys);
+ else
+ keyIxA = opts.LeftKeys;
+ endif
+ keyNamesA = A.VariableNames(keyIxA);
+ elseif ischar (opts.LeftKeys) || iscellstr (opts.LeftKeys)
+ keyNamesA = cellstr (opts.LeftKeys);
+ [tf, keyIxA] = ismember (keyNamesA, A.VariableNames);
+ if ! all (tf)
+ error ('Named keys not found in table A: %s', strjoin (keyNamesA(!tf), ', '));
+ endif
+ endif
+ if isnumeric (opts.RightKeys) || islogical (opts.RightKeys)
+ if islogical (opts.RightKeys)
+ keyIxB = find (opts.RightKeys);
+ else
+ keyIxB = opts.RightKeys;
+ endif
+ keyNamesB = B.VariableNames(keyIxB);
+ elseif ischar (opts.RightKeys) || iscellstr (opts.RightKeys)
+ keyNamesB = cellstr (opts.RightKeys);
+ [tf, keyIxB] = ismember (keyNamesB, B.VariableNames);
+ if ! all (tf)
+ error ('Named keys not found in table B: %s', strjoin (keyNamesB(!tf), ', '));
+ endif
+ endif
+ elseif isfield (opts, 'RightKeys')
+ error ('If option RightKeys is supplied, then LeftKeys must be, too.');
+ else
+ % Default keys are a natural join
+ commonCols = intersect (A.VariableNames, B.VariableNames);
+ keyNamesA = commonCols;
+ keyNamesB = commonCols;
+ [~, keyIxA] = ismember (commonCols, A.VariableNames);
+ [~, keyIxB] = ismember (commonCols, B.VariableNames);
+ endif
+ if numel (keyIxA) != numel (keyIxB)
+ error ('Number of keys must be same for A and B; got %d vs. %s', ...
+ numel (keyIxA), numel (keyIxB));
+ endif
+
+ if isfield (opts, 'LeftVariables')
+ if isnumeric (opts.LeftVariables) || islogical (opts.LeftVariables)
+ if islogical (opts.LeftVariables)
+ varIxA = find (opts.LeftVariables);
+ else
+ varIxA = opts.LeftVariables;
+ endif
+ varNamesA = A.VariableNames(varIxA);
+ else
+ varNamesA = cellstr (opts.LeftVariables);
+ [tf, varIxA] = ismember (varNamesA, A.VariableNames);
+ if ! all (tf)
+ error ('Named variables not found in table A: %s', strjoin (varNamesA(!tf), ', '));
+ endif
+ endif
+ else
+ varIxA = 1:width(A);
+ varNamesA = A.VariableNames;
+ endif
+ if isfield (opts, 'RightVariables')
+ if isnumeric (opts.RightVariables) || islogical (opts.RightVariables)
+ if islogical (opts.RightVariables)
+ varIxB = find (opts.RightVariables);
+ else
+ varIxB = opts.RightVariables;
+ endif
+ varNamesB = B.VariableNames(varIxB);
+ else
+ varNamesB = cellstr (opts.RightVariables);
+ [tf, varIxB] = ismember (varNamesB, B.VariableNames);
+ if ! all (tf)
+ error ('Named variables not found in table B: %s', strjoin (varNamesB(!tf), ', '));
+ endif
+ endif
+ else
+ varIxB = find (! ismember (B.VariableNames, varNamesA));
+ varNamesB = B.VariableNames(varIxB);
+ endif
+
+ out.keyIxA = keyIxA;
+ out.keyIxB = keyIxB;
+ out.keyNamesA = keyNamesA;
+ out.keyNamesB = keyNamesB;
+ out.varIxA = varIxA;
+ out.varIxB = varIxB;
+ out.varNamesA = varNamesA;
+ out.varNamesB = varNamesB;
+ endfunction
+
+ ## -*- texinfo -*-
+ ## @node table.innerjoin
+ ## @deftypefn {Method} {[@var{out}, @var{ixa}, @var{ixb}] =} innerjoin (@var{A}, @var{B})
+ ## @deftypefnx {Method} {[@dots{}] =} innerjoin (@var{A}, @var{B}, @dots{})
+ ##
+ ## Combine two tables by rows using key variables.
+ ##
+ ## Computes the relational inner join between two tables. “Inner” means that
+ ## only rows which had matching rows in the other input are kept in the
+ ## output.
+ ##
+ ## TODO: Document options.
+ ##
+ ## Returns:
+ ## @var{out} - A table that is the result of joining A and B
+ ## @var{ix} - Indexes into A for each row in out
+ ## @var{ixb} - Indexes into B for each row in out
+ ##
+ ## @end deftypefn
+ function [out, ixa, ixb] = innerjoin(A, B, varargin)
+ % Input munging
+ optNames = {'Keys', 'LeftKeys', 'RightKeys', ...
+ 'LeftVariables', 'RightVariables'};
+ opts = peelOffNameValueOptions (varargin, optNames);
+ if ! istable (A)
+ A = table (A);
+ endif
+ if ! istable (B)
+ B = table (B);
+ endif
+
+ [out, ix] = realjoin(A, B, varargin{:});
+ ixa = ix(:,1);
+ ixb = ix(:,2);
+ endfunction
+
+ function [out, ixs] = realjoin(A, B, varargin)
+ %REALJOIN "Real" relational inner join, without key restrictions
+ %
+ % [out, ixs] = realjoin(A, B, varargin)
+ %
+ % Performs a "real" relational natural inner join between two tables,
+ % without the key restrictions that JOIN imposes.
+ %
+ % Currently does not support tables which have RowNames. This may be
+ % added in the future.
+ %
+ % This is an Octave extension.
+ %
+ % TODO: Document options.
+ %
+ % Returns:
+ % out - a table containing the result of joining A and B, with RowNames
+ % removed.
+ % ixs - an n-by-2 double matrix where ixs(i,:) is [ixA ixB], which are
+ % the indexes from A and B containing the input rows that resulted
+ % in this output row.
+
+ % Input handling
+ optNames = {'Keys', 'LeftKeys', 'RightKeys', ...
+ 'LeftVariables', 'RightVariables'};
+ opts = peelOffNameValueOptions (varargin, optNames);
+ if ! istable (A)
+ A = table (A);
+ endif
+ if ! istable (B)
+ B = table (B);
+ endif
+ opts2 = resolveJoinKeysAndVars (A, B, opts);
+ if hasrownames (A)
+ error ('table.realjoin: Input A may not have row names');
+ endif
+ if hasrownames (B)
+ error ('table.realjoin: Input B may not have row names');
+ endif
+
+ % Join logic
+ if isempty (opts2.keyIxA)
+ % This degenerates to a cartesian product
+ [out, ixs] = cartesian (A, B);
+ return
+ endif
+ keysA = subsetvars (A, opts2.keyIxA);
+ keysB = subsetvars (B, opts2.keyIxB);
+ [pkA, pkB] = proxykeysForMatrixes (keysA, keysB);
+ ixs = octave.table.internal.matchrows (pkA, pkB);
+ subA = subsetvars (A, opts2.varIxA);
+ subB = subsetvars (B, opts2.varIxB);
+ [subA, subB] = makeVarNamesUnique (subA, subB);
+ outA = subsetrows (subA, ixs(:,1));
+ outB = subsetrows (subB, ixs(:,2));
+ out = [outA outB];
+
+ endfunction
+
+ ## -*- texinfo -*-
+ ## @node table.outerjoin
+ ## @deftypefn {Method} {[@var{out}, @var{ixa}, @var{ixb}] =} outerjoin (@var{A}, @var{B})
+ ## @deftypefnx {Method} {[@dots{}] =} outerjoin (@var{A}, @var{B}, @dots{})
+ ##
+ ## Combine two tables by rows using key variables, retaining unmatched rows.
+ ##
+ ## Computes the relational outer join of tables A and B. This is like a
+ ## regular join, but also includes rows in each input which did not have
+ ## matching rows in the other input; the columns from the missing side are
+ ## filled in with placeholder values.
+ ##
+ ## TODO: Document options.
+ ##
+ ## Returns:
+ ## @var{out} - A table that is the result of the outer join of A and B
+ ## @var{ixa} - indexes into A for each row in out
+ ## @var{ixb} - indexes into B for each row in out
+ ##
+ ## @end deftypefn
+ function [out, ixa, ixb] = outerjoin (A, B, varargin)
+
+ % Input handling
+ if !istable (A)
+ A = table (A);
+ endif
+ if !istable (B)
+ B = table (B);
+ endif
+ optNames = {'Keys', 'LeftKeys', 'RightKeys', 'MergeKeys', ...
+ 'LeftVariables', 'RightVariables', 'Type'};
+ opts = peelOffNameValueOptions (varargin, optNames);
+ if isfield (opts, 'Type')
+ joinType = opts.Type;
+ else
+ joinType = 'full';
+ endif
+ switch joinType
+ case 'left'
+ fillLeft = false;
+ fillRight = true;
+ case 'right'
+ fillLeft = true;
+ fillRight = false;
+ case 'full'
+ fillLeft = true;
+ fillRight = true;
+ case 'inner'
+ fillLeft = true;
+ fillRight = true;
+ otherwise
+ error ('table.outerjoin: Invalid opts.Type: %s', joinType);
+ endswitch
+ opts2 = resolveJoinKeysAndVars (A, B, opts);
+ if hasrownames (A)
+ error ('table.outerjoin: Input A may not have row names');
+ endif
+ if hasrownames (B)
+ error ('table.outerjoin: Input B may not have row names');
+ endif
+
+ % Join logic
+ if isempty (opts2.keyIxA)
+ % This degenerates to a cartesian product
+ [out, ixs] = cartesian (A, B);
+ return
+ endif
+ keysA = subsetvars (A, opts2.keyIxA);
+ keysB = subsetvars (B, opts2.keyIxB);
+ [pkA, pkB] = proxykeysForMatrixes (keysA, keysB);
+ [ixs, ixUnmatchedA, ixUnmatchedB] = octave.table.internal.matchrows (pkA, pkB);
+ subA = subsetvars (A, opts2.varIxA);
+ subB = subsetvars (B, opts2.varIxB);
+ [subA, subB] = makeVarNamesUnique (subA, subB);
+ outA = subsetrows (subA, ixs(:,1));
+ outB = subsetrows (subB, ixs(:,2));
+ ixa = ixs(:,1);
+ ixb = ixs(:,2);
+ fillTables = {};
+ if fillLeft
+ fillRowB = outerfillvals (subB);
+ fillLeftTable = [subsetrows(subA, ixUnmatchedA) ...
+ repmat(fillRowB, [numel(ixUnmatchedA) 1])];
+ fillTables{end+1} = fillLeftTable;
+ ixa = [ixa; ixUnmatchedA];
+ ixB = [ixb; NaN(size(ixUnmatchedA))];
+ endif
+ if fillRight
+ fillRowA = outerfillvals (subA);
+ fillRightTable = [repmat(fillRowA, [numel(ixUnmatchedB) 1]) ...
+ subsetrows(subB, ixUnmatchedB)];
+ fillTables{end+1} = fillRightTable;
+ ixa = [ixa; NaN(size(ixUnmatchedB))];
+ ixb = [ixb; ixUnmatchedB];
+ endif
+ out = [outA outB];
+ % We have to do a loop because Octave doesn't like a plain cat() here
+ for i = 1:numel (fillTables)
+ out = [out; fillTables{i}];
+ endfor
+
+ endfunction
+
+ ## -*- texinfo -*-
+ ## @node table.outerfillvals
+ ## @deftypefn {Method} {@var{out} =} outerfillvals (@var{obj})
+ ##
+ ## Get fill values for outer join.
+ ##
+ ## Returns a table with the same variables as this, but containing only
+ ## a single row whose variable values are the values to use as fill values
+ ## when doing an outer join.
+ ##
+ ## @end deftypefn
+ function out = outerfillvals (this)
+ fillVals = cell (1, width (this));
+ for iCol = 1:width (this)
+ x = this.VariableValues{iCol};
+ fillVals{iCol} = tableOuterFillValue (x);
+ endfor
+ out = table (fillVals{:}, 'VariableNames', this.VariableNames);
+ endfunction
+
+ ## -*- texinfo -*-
+ ## @node table.semijoin
+ ## @deftypefn {Method} {[@var{outA}, @var{ixA}, @var{outB}, @var{ixB}] =} semijoin @
+ ## (@var{A}, @var{B})
+ ##
+ ## Natural semijoin.
+ ##
+ ## Computes the natural semijoin of tables A and B. The semi-join of tables
+ ## A and B is the set of all rows in A which have matching rows in B, based
+ ## on comparing the values of variables with the same names.
+ ##
+ ## This method also computes the semijoin of B and A, for convenience.
+ ##
+ ## Returns:
+ ## @var{outA} - all the rows in A with matching row(s) in B
+ ## @var{ixA} - the row indexes into A which produced @var{outA}
+ ## @var{outB} - all the rows in B with matching row(s) in A
+ ## @var{ixB} - the row indexes into B which produced @var{outB}
+ ##
+ ## @end deftypefn
+ function [outA, ixA, outB, ixB] = semijoin (A, B)
+
+ % Developer note: This is almost exactly the same as antijoin, just with
+ % inverted ismember() tests. See if the implementations can be refactored
+ % together.
+
+ % Input handling
+ if !istable (A)
+ A = table (A);
+ endif
+ if !istable (B)
+ B = table (B);
+ endif
+
+ % Join logic
+ keyVarNames = intersect_stable (A.VariableNames, B.VariableNames);
+ nonKeyVarsB = setdiff_stable (B.VariableNames, keyVarNames);
+ if isempty (keyVarNames)
+ % The degenerate case of no common variable names is to keep all the rows.
+ outA = A;
+ ixA = 1:height (A);
+ outB = B;
+ ixB = 1:height (B);
+ return
+ endif
+ keysA = subsetvars (A, keyVarNames);
+ keysB = subsetvars (B, keyVarNames);
+ [pkA, pkB] = proxykeysForMatrixes (keysA, keysB);
+ ixA = find (ismember (pkA, pkB, 'rows'));
+ outA = subsetrows (A, ixA);
+ if nargout > 2
+ ixB = find (ismember (pkB, pkA, 'rows'));
+ outB = subsetrows (B, ixB);
+ endif
+ endfunction
+
+ ## -*- texinfo -*-
+ ## @node table.antijoin
+ ## @deftypefn {Method} {[@var{outA}, @var{ixA}, @var{outB}, @var{ixB}] =} antijoin @
+ ## (@var{A}, @var{B})
+ ##
+ ## Natural antijoin (AKA “semidifference”).
+ ##
+ ## Computes the anti-join of A and B. The anti-join is defined as all the
+ ## rows from one input which do not have matching rows in the other input.
+ ##
+ ## Returns:
+ ## @var{outA} - all the rows in A with no matching row in B
+ ## @var{ixA} - the row indexes into A which produced @var{outA}
+ ## @var{outB} - all the rows in B with no matching row in A
+ ## @var{ixB} - the row indexes into B which produced @var{outB}
+ ##
+ ## @end deftypefn
+ function [outA, ixA, outB, ixB] = antijoin (A, B)
+
+ % Input handling
+ if !istable (A)
+ A = table (A);
+ endif
+ if !istable (B)
+ B = table (B);
+ endif
+
+ % Join logic
+ keyVarNames = intersect_stable (A.VariableNames, B.VariableNames);
+ nonKeyVarsB = setdiff_stable (B.VariableNames, keyVarNames);
+ if isempty (keyVarNames)
+ % The degenerate case when there are no common variable names is the empty set
+ outA = subsetrows (A, []);
+ ixA = [];
+ outB = subsetrows (B, []);
+ ixB = [];
+ return
+ endif
+ keysA = subsetvars (A, keyVarNames);
+ keysB = subsetvars (B, keyVarNames);
+ [pkA, pkB] = proxykeysForMatrixes (keysA, keysB);
+ ixA = find (!ismember (pkA, pkB, 'rows'));
+ outA = subsetrows (A, ixA);
+ if nargout > 2
+ ixB = find (!ismember (pkB, pkA, 'rows'));
+ outB = subsetrows (B, ixB);
+ endif
+ endfunction
+
+ ## -*- texinfo -*-
+ ## @node table.cartesian
+ ## @deftypefn {Method} {[@var{out}, @var{ixs}] =} cartesian (@var{A}, @var{B})
+ ##
+ ## Cartesian product of two tables.
+ ##
+ ## Computes the Cartesian product of two tables. The Cartesian product is
+ ## each row in A combined with each row in B.
+ ##
+ ## Due to the definition and structural constraints of table, the two inputs
+ ## must have no variable names in common. It is an error if they do.
+ ##
+ ## The Cartesian product is seldom used in practice. If you find yourself
+ ## calling this method, you should step back and re-evaluate what you are
+ ## doing, asking yourself if that is really what you want to happen. If nothing
+ ## else, writing a function that calls cartesian() is usually much less
+ ## efficient than alternate ways of arriving at the same result.
+ ##
+ ## This implementation does not remove duplicate values.
+ ## TODO: Determine whether this duplicate-removing behavior is correct.
+ ##
+ ## The ordering of the rows in the output is not specified, and may be implementation-
+ ## dependent. TODO: Determine if we can lock this behavior down to a fixed,
+ ## defined ordering, without killing performance.
+ ##
+ ## @end deftypefn
+ function [out, ixs] = cartesian (A, B)
+
+ % Developer's note: The second argout, ixs, is for table's internal use,
+ % and is thus undocumented.
+
+ mustBeA (A, 'table');
+ mustBeA (B, 'table');
+
+ commonVars = intersect (A.VariableNames, B.VariableNames);
+ if ~isempty (commonVars)
+ error ('table.cartesian: Inputs have variable names in common: %s', ...
+ strjoin (commonVars, ', '));
+ endif
+
+ nRowsA = height (A);
+ nRowsB = height (B);
+ ixA = (1:nRowsA)';
+ ixB = (1:nRowsB)';
+ ixAOut = repelem (ixA, nRowsB);
+ ixBOut = repmat (ixB, [nRowsA 1]);
+ ixs = [ixAOut ixBOut];
+ outA = subsetrows (A, ixAOut);
+ outB = subsetrows (B, ixBOut);
+ out = [outA outB];
+ endfunction
+
+ ## -*- texinfo -*-
+ ## @node table.groupby
+ ## @deftypefn {Method} {[@var{out}] =} groupby (@var{obj}, @var{groupvars}, @var{aggcalcs})
+ ##
+ ## Find groups in table data and apply functions to variables within groups.
+ ##
+ ## This works like an SQL @code{"SELECT ... GROUP BY ..."} statement.
+ ##
+ ## @var{groupvars} (cellstr, numeric) is a list of the grouping variables,
+ ## identified by name or index.
+ ##
+ ## @var{aggcalcs} is a specification of the aggregate calculations to perform
+ ## on them, in the form @code{@{}@var{out_var}@code{,} @var{fcn}@code{,} @var{in_vars}@code{; ...@}}, where:
+ ## @var{out_var} (char) is the name of the output variable
+ ## @var{fcn} (function handle) is the function to apply to produce it
+ ## @var{in_vars} (cellstr) is a list of the input variables to pass to fcn
+ ##
+ ## Returns a table.
+ ##
+ ## @end deftypefn
+ function out = groupby (this, groupvars, aggcalcs)
+ narginchk (2, 3);
+ if nargin < 3 || isempty (aggcalcs); aggcalcs = cell(0, 3); end
+ mustBeA (aggcalcs, 'cell');
+ if size (aggcalcs, 2) != 3
+ error ('table.groupby: aggcalcs must be an 3-wide cell; got %d-wide', ...
+ size (aggcalcs, 2));
+ endif
+
+ % Resolve input vars once up front for speed
+ n_aggs = size (aggcalcs, 1);
+ for i = 1:n_aggs
+ aggcalcs{i,4} = resolveVarRef (this, aggcalcs{i,3});
+ endfor
+
+ agg_outs = cell (1, n_aggs);
+ [group_id, groups_tbl] = findgroups (subsetvars (this, groupvars));
+ n_groups = size (groups_tbl, 1);
+ for i_group = 1:n_groups
+ tf_in_group = group_id == i_group;
+ for i_agg = 1:n_aggs
+ [~, fcn, in_vars, ix_in_vars] = aggcalcs{i_agg,:};
+ agg_ins = cell(1, numel (ix_in_vars));
+ for i_in_var = 1:numel (agg_ins)
+ agg_ins{i_in_var} = this.VariableValues{ix_in_vars(i_in_var)}(tf_in_group);
+ endfor
+ agg_out = fcn(agg_ins{:});
+ if i_group == 1
+ agg_outs{i_agg} = repmat(agg_out, [n_groups 1]);
+ else
+ agg_outs{i_agg}(i_group,:) = agg_out;
+ endif
+ endfor
+ endfor
+
+ agg_out_tbl = table(agg_outs{:}, 'VariableNames', aggcalcs(:,1));
+ out = [groups_tbl agg_out_tbl];
+ endfunction
+
+ ## -*- texinfo -*-
+ ## @node table.grpstats
+ ## @deftypefn {Method} {[@var{out}] =} grpstats (@var{obj}, @var{groupvar})
+ ## @deftypefnx {Method} {[@var{out}] =} grpstats (@dots{}, @code{'DataVars'}, @var{DataVars})
+ ##
+ ## Statistics by group.
+ ##
+ ## See also: @ref{table.groupby}.
+ ##
+ ## @end deftypefn
+ function out = grpstats (this, groupvar, varargin)
+ [opts, args] = peelOffNameValueOptions (varargin, {'DataVars'});
+ if numel (args) > 1
+ error ('table.grpstats: too many inputs');
+ elseif numel (args) == 1
+ whichstats = args{1};
+ else
+ whichstats = {'mean'};
+ endif
+ if ! iscell (whichstats)
+ error ('whichstats must be a cell array');
+ endif
+ [ix_groupvar, groupvar_names] = resolveVarRef (this, groupvar);
+ if isfield (opts, 'DataVars')
+ data_vars = opts.DataVars;
+ [ix_data_vars, data_vars] = resolveVarRef (this, data_vars);
+ else
+ data_vars = setdiff (this.VariableNames, groupvar_names);
+ endif
+ aggs = cell(0, 3);
+
+ % TODO: Implement sem, gname, meanci, predci
+ stat_map = {
+ 'mean' @mean
+ 'numel' @numel
+ 'std' @std
+ 'var' @var
+ 'min' @min
+ 'max' @max
+ 'range' @range
+ };
+
+ for i_var = 1:numel (data_vars)
+ for i_stat = 1:numel (whichstats)
+ if ischar (whichstats{i_stat})
+ stat_fcn_name = whichstats{i_stat};
+ [tf,loc] = ismember (stat_fcn_name, stat_map(:,1));
+ if ! tf
+ error ('table.grpstats: unsupported stat name: %s', stat_fcn_name);
+ endif
+ stat_fcn = stat_map{loc,2};
+ elseif isa (whichstats{i_stat}, fcn_handle)
+ stat_fcn = whichstats{i_stat};
+ stat_fcn_name = func2str (stat_fcn);
+ endif
+ out_var = sprintf('%s_%s', stat_fcn_name, data_vars{i_var});
+ aggs = [aggs; { out_var, stat_fcn, data_vars{i_var} }];
+ endfor
+ endfor
+
+ out = groupby (this, groupvar, aggs);
+ endfunction
+
+ ## -*- texinfo -*-
+ ## @node table.splitapply
+ ## @deftypefn {Method} {@var{out} =} splitapply (@var{func}, @var{obj}, @var{G})
+ ## @deftypefnx {Method} {[@var{Y1}, @dots{}, @var{YM}] =} splitapply (@var{func}, @var{obj}, @var{G})
+ ##
+ ## Split table data into groups and apply function.
+ ##
+ ## Performs a splitapply, using the variables in @var{obj} as the input X variables
+ ## to the @code{splitapply} function call.
+ ##
+ ## See also: @ref{splitapply}, @ref{table.groupby}
+ ##
+ ## @end deftypefn
+ function varargout = splitapply (func, this, G)
+ mustBeA (func, 'function_handle');
+ mustBeA (this, 'table');
+ vars = this.VariableValues;
+ out = octave.internal.splitapply_impl (func, vars{:}, G);
+ endfunction
+
+ ## -*- texinfo -*-
+ ## @node table.rows2vars
+ ## @deftypefn {Method} {@var{out} =} rows2vars (@var{obj})
+ ## @deftypefnx {Method} {@var{out} =} rows2vars (@var{obj}, @
+ ## @code{'VariableNamesSource'}, @var{VariableNamesSource})
+ ## @deftypefnx {Method} {@var{out} =} rows2vars (@dots{}, @
+ ## @code{'DataVariables'}, @var{DataVariables})
+ ##
+ ## Reorient table, swapping rows and variables dimensions.
+ ##
+ ## This flips the dimensions of the given table @var{obj}, swapping the
+ ## orientation of the contained data, and swapping the row names/labels
+ ## and variable names.
+ ##
+ ## The variable names become a new variable named “OriginalVariableNames”.
+ ##
+ ## The row names are drawn from the column @var{VariableNamesSource} if it
+ ## is specified. Otherwise, if @var{obj} has row names, they are used.
+ ## Otherwise, new variable names in the form “VarN” are generated.
+ ##
+ ## If all the variables in @var{obj} are of the same type, they are concatenated
+ ## and then sliced to create the new variable values. Otherwise, they are
+ ## converted to cells, and the new table has cell variable values.
+ ##
+ ## @end deftypefn
+ function out = rows2vars (this, varargin)
+ [opts, args] = peelOffNameValueOptions (varargin, {'VariableNamesSource', ...
+ 'DataVariables'});
+
+ for i = 1:width (this)
+ if size (this.VariableValues{i}, 2) > 1
+ error ('table.rows2vars: Table variables may not have more than 1 column');
+ elseif istable (this.VariableValues{i})
+ error ('table.rows2vars: Nested tables are not supported');
+ endif
+ endfor
+
+ if isfield (opts, 'VariableNamesSource')
+ new_var_names = getvar (this, opts.VariableNamesSource);
+ tbl = removevars (this, opts.VariableNamesSource);
+ elseif hasrownames (this)
+ new_var_names = this.RowNames';
+ tbl = this;
+ else
+ new_var_names = cell (1, height (this));
+ for i = 1:height (this)
+ new_var_names{i} = sprintf ("Var%d", i);
+ endfor
+ tbl = this;
+ endif
+ if isfield (opts, 'DataVariables')
+ tbl = subsetvars (tbl, opts.DataVariables);
+ endif
+ OriginalVariableNames = tbl.VariableNames(:);
+ tbl_original_var_names = table (OriginalVariableNames);
+
+ col_types = cellfun (@(x) class(x), tbl.VariableValues);
+ u_col_types = unique (col_types);
+ if isscalar (u_col_types)
+ matrix = cat (2, tbl.VariableValues{:});
+ else
+ cols_as_cells = cell (1, width (tbl));
+ for i = 1:width (tbl)
+ if iscell (tbl.VariableValues{i})
+ cols_as_cells{i} = tbl.VariableValues{i};
+ else
+ cols_as_cells{i} = num2cell (tbl.VariableValues{i});
+ endif
+ endfor
+ matrix = cat (2, cols_as_cells{:});
+ endif
+
+ matrix = matrix';
+ new_var_values = num2cell (matrix, 1);
+ out = table(new_var_values{:}, ...
+ 'VariableNames', new_var_names);
+ out = [tbl_original_var_names out];
+ endfunction
+
+ ## -*- texinfo -*-
+ ## @node table.congruentize
+ ## @deftypefn {Method} {[@var{outA}, @var{outB}] =} congruentize (@var{A}, @var{B})
+ ##
+ ## Make tables congruent.
+ ##
+ ## Makes tables congruent by ensuring they have the same variables of the
+ ## same types in the same order. Congruent tables may be safely unioned,
+ ## intersected, vertcatted, or have other set operations done to them.
+ ##
+ ## Variable names present in one input but not in the other produces an error.
+ ## Variables with the same name but different types in the inputs produces
+ ## an error.
+ ## Inputs must either both have row names or both not have row names; it is
+ ## an error if one has row names and the other doesn't.
+ ## Variables in different orders are reordered to be in the same order as A.
+ ##
+ ## @end deftypefn
+ function [outA, outB] = congruentize (A, B)
+
+ if !istable (A)
+ A = table (A);
+ endif
+ if !istable (B)
+ B = table (B);
+ endif
+ if hasrownames (A) && !hasrownames (B)
+ error ('table.congruentize: Input A has row names but input B does not');
+ endif
+ if !hasrownames (A) && hasrownames (B)
+ error ('table.congruentize: Input B has row names but input A does not');
+ endif
+ varsOnlyInA = setdiff (A.VariableNames, B.VariableNames);
+ if !isempty (varsOnlyInA)
+ error ('table.congruentize: Input A has variables not present in B: %s', ...
+ strjoin (varsOnlyInA, ', '));
+ endif
+ varsOnlyInB = setdiff (B.VariableNames, A.VariableNames);
+ if !isempty (varsOnlyInB)
+ error ('table.congruentize: Input B has variables not present in A: %s', ...
+ strjoin (varsOnlyInB, ', '));
+ endif
+
+ outA = A;
+ outB = B;
+ [~,loc] = ismember (A.VariableNames, outB.VariableNames);
+ outB = subsetvars (outB, loc);
+
+ endfunction
+
+ ## -*- texinfo -*-
+ ## @node table.union
+ ## @deftypefn {Method} {[@var{C}, @var{ia}, @var{ib}] =} union (@var{A}, @var{B})
+ ##
+ ## Set union.
+ ##
+ ## Computes the union of two tables. The union is defined to be the unique
+ ## row values which are present in either of the two input tables.
+ ##
+ ## Returns:
+ ## @var{C} - A table containing all the unique row values present in A or B.
+ ## @var{ia} - Row indexes into A of the rows from A included in C.
+ ## @var{ib} - Row indexes into B of the rows from B included in C.
+ ##
+ ## @end deftypefn
+ function [C, ia, ib] = union (A, B)
+ % Input handling
+ [A, B] = congruentize (A, B);
+
+ % Set logic
+ [pkA, pkB] = proxykeysForMatrixes (A, B);
+ [~, ia, ib] = union (pkA, pkB, 'rows');
+ C = [subsetrows(A, ia); subsetrows(B, ib)];
+ endfunction
+
+ ## -*- texinfo -*-
+ ## @node table.intersect
+ ## @deftypefn {Method} {[@var{C}, @var{ia}, @var{ib}] =} intersect (@var{A}, @var{B})
+ ##
+ ## Set intersection.
+ ##
+ ## Computes the intersection of two tables. The intersection is defined to be the unique
+ ## row values which are present in both of the two input tables.
+ ##
+ ## Returns:
+ ## @var{C} - A table containing all the unique row values present in both A and B.
+ ## @var{ia} - Row indexes into A of the rows from A included in C.
+ ## @var{ib} - Row indexes into B of the rows from B included in C.
+ ##
+ ## @end deftypefn
+ function [C, ia, ib] = intersect (A, B)
+ % Input handling
+ [A, B] = congruentize (A, B);
+
+ % Set logic
+ [pkA, pkB] = proxykeysForMatrixes (A, B);
+ [~, ia, ib] = intersect (pkA, pkB, 'rows');
+ C = [subsetrows(A, ia); subsetrows(B, ib)];
+ endfunction
+
+ ## -*- texinfo -*-
+ ## @node table.setxor
+ ## @deftypefn {Method} {[@var{C}, @var{ia}, @var{ib}] =} setxor (@var{A}, @var{B})
+ ##
+ ## Set exclusive OR.
+ ##
+ ## Computes the setwise exclusive OR of two tables. The set XOR is defined to be
+ ## the unique row values which are present in one or the other of the two input
+ ## tables, but not in both.
+ ##
+ ## Returns:
+ ## @var{C} - A table containing all the unique row values in the set XOR of A and B.
+ ## @var{ia} - Row indexes into A of the rows from A included in C.
+ ## @var{ib} - Row indexes into B of the rows from B included in C.
+ ##
+ ## @end deftypefn
+ function [C, ia, ib] = setxor (A, B)
+ % Input handling
+ [A, B] = congruentize (A, B);
+
+ % Set logic
+ [pkA, pkB] = proxykeysForMatrixes (A, B);
+ [~, ia, ib] = setxor (pkA, pkB, 'rows');
+ C = [subsetrows(A, ia); subsetrows(B, ib)];
+ endfunction
+
+ ## -*- texinfo -*-
+ ## @node table.setdiff
+ ## @deftypefn {Method} {[@var{C}, @var{ia}] =} setdiff (@var{A}, @var{B})
+ ##
+ ## Set difference.
+ ##
+ ## Computes the set difference of two tables. The set difference is defined to be
+ ## the unique row values which are present in table A that are not in table B.
+ ##
+ ## Returns:
+ ## @var{C} - A table containing the unique row values in A that were not in B.
+ ## @var{ia} - Row indexes into A of the rows from A included in C.
+ ##
+ ## @end deftypefn
+ function [C, ia] = setdiff (A, B)
+ % Input handling
+ [A, B] = congruentize (A, B);
+
+ % Set logic
+ [pkA, pkB] = proxykeysForMatrixes (A, B);
+ [~, ia] = setdiff (pkA, pkB, 'rows');
+ C = subsetrows (A, ia);
+ endfunction
+
+ ## -*- texinfo -*-
+ ## @node table.ismember
+ ## @deftypefn {Method} {[@var{tf}, @var{loc}] =} ismember (@var{A}, @var{B})
+ ##
+ ## Set membership.
+ ##
+ ## Finds rows in A that are members of B.
+ ##
+ ## Returns:
+ ## @var{tf} - A logical vector indicating whether each A(i,:) was present in B.
+ ## @var{loc} - Indexes into B of rows that were found.
+ ##
+ ## @end deftypefn
+ function [tf, loc] = ismember (A, B)
+ % Input handling
+ [A, B] = congruentize (A, B);
+
+ % Set logic
+ [pkA, pkB] = proxykeysForMatrixes (A, B);
+ [tf, loc] = ismember (pkA, pkB, 'rows');
+ endfunction
+
+ % Missing values
+
+ ## -*- texinfo -*-
+ ## @node table.ismissing
+ ## @deftypefn {Method} {@var{out} =} ismissing (@var{obj})
+ ## @deftypefnx {Method} {@var{out} =} ismissing (@var{obj}, @var{indicator})
+ ##
+ ## Find missing values.
+ ##
+ ## Finds missing values in @var{obj}’s variables.
+ ##
+ ## If indicator is not supplied, uses the standard missing values for each
+ ## variable’s data type. If indicator is supplied, the same indicator list is
+ ## applied across all variables.
+ ##
+ ## All variables in this must be vectors. (This is due to the requirement
+ ## that @code{size(out) == size(obj)}.)
+ ##
+ ## Returns a logical array the same size as @var{obj}.
+ ##
+ ## @end deftypefn
+ function out = ismissing (this, indicator)
+ mustBeA (this, 'table');
+ hasIndicator = false;
+ if nargin > 1
+ hasIndicator = true;
+ endif
+ mustBeAllColVectorVars (this);
+ out = false (size (this));
+ for i = 1:width (this)
+ if hasIndicator
+ out(:,i) = ismissing (this.VariableValues{i}, indicator);
+ else
+ out(:,i) = ismissing (this.VariableValues{i});
+ endif
+ endfor
+ endfunction
+
+ ## -*- texinfo -*-
+ ## @node table.rmmissing
+ ## @deftypefn {Method} {[@var{out}, @var{tf}] =} rmmissing (@var{obj})
+ ## @deftypefnx {Method} {[@var{out}, @var{tf}] =} rmmissing (@var{obj}, @var{indicator})
+ ## @deftypefnx {Method} {[@var{out}, @var{tf}] =} rmmissing (@dots{}, @code{'DataVariables'}, @var{vars})
+ ## @deftypefnx {Method} {[@var{out}, @var{tf}] =} rmmissing (@dots{}, @code{'MinNumMissing'}, @var{minNumMissing})
+ ##
+ ## Remove rows with missing values.
+ ##
+ ## Removes the rows from @var{obj} that have missing values.
+ ##
+ ## If the 'DataVariables' option is given, only the data in the specified
+ ## variables is considered.
+ ##
+ ## Returns:
+ ## @var{out} - A table the same as @var{obj}, but with rows with missing values removed.
+ ## @var{tf} - A logical index vector indicating which rows were removed.
+ ##
+ ## @end deftypefn
+ function [out, tf] = rmmissing (this, varargin)
+ [opts, args] = peelOffNameValueOptions (varargin, {'DataVariables','MinNumMissing'});
+ hasIndicator = false;
+ if numel (args) > 1
+ error ('table.rmmissing: Too many arguments.');
+ elseif numel (args) == 1
+ hasIndicator = true;
+ indicator = args{1};
+ endif
+ minNumMissing = 1;
+ if isfield (opts, 'MinNumMissing')
+ minNumMissing = opts.MinNumMissing;
+ endif
+
+ if isfield (opts, 'DataVariables')
+ dataVarsSelector = opts.DataVariables;
+ if isa (dataVarsSelector, 'function_handle')
+ error ('table.rmmissing: function handle DataVariables option is not implemented.');
+ else
+ ixDataVars = resolveVarRef (dataVarsSelector);
+ endif
+ else
+ ixDataVars = 1:width (this);
+ endif
+
+ dataVals = subsetvars (this, ixDataVars);
+ if hasIndicator
+ tfMissing = ismissing (dataVals, indicator);
+ else
+ tfMissing = ismissing (dataVals);
+ endif
+ nMissing = sum (tfMissing, 2);
+ tfRowHasMissing = nMissing >= minNumMissing;
+ out = subsetrows (this, ~tfRowHasMissing);
+ tf = tfRowHasMissing;
+ endfunction
+
+ ## -*- texinfo -*-
+ ## @node table.standardizeMissing
+ ## @deftypefn {Method} {@var{out} =} standardizeMissing (@var{obj}, @var{indicator})
+ ## @deftypefnx {Method} {@var{out} =} standardizeMissing (@dots{}, @code{'DataVariables'}, @var{vars})
+ ##
+ ## Insert standard missing values.
+ ##
+ ## Standardizes missing values in variable data.
+ ##
+ ## If the @var{DataVariables} option is supplied, only the indicated variables are
+ ## standardized.
+ ##
+ ## @var{indicator} is passed along to @code{standardizeMissing} when it is called on each
+ ## of the data variables in turn. The same indicator is used for all
+ ## variables. You can mix and match indicator types by just passing in
+ ## mixed indicator types in a cell array; indicators that don't match the
+ ## type of the column they are operating on are just ignored.
+ ##
+ ## Returns a table with same variable names and types as @var{obj}, but with variable
+ ## values standardized.
+ ##
+ ## @end deftypefn
+ function out = standardizeMissing (this, indicator, varargin)
+ narginchk (2, 4);
+ mustBeA (this, 'table');
+ [opts, args] = peelOffNameValueOptions (varargin, {'DataVariables'});
+ if ~isempty (args)
+ error ('table.standardizeMissing: Too many input arguments');
+ endif
+
+ if isfield (opts, 'DataVariables')
+ dataVarsSelector = opts.DataVariables;
+ if isa (dataVarsSelector, 'function_handle')
+ error ('table.standardizeMissing: function handle DataVariables option is not implemented.');
+ else
+ ixDataVars = resolveVarRef (dataVarsSelector);
+ endif
+ else
+ ixDataVars = 1:width (this);
+ endif
+
+ out = this;
+ for i = 1:numel (ixDataVars)
+ ixDataVar = ixDataVars(i);
+ val = this.VariableValues{ixDataVar};
+ standardized = standardizeMissing (val, indicator);
+ out.VariableValues{ixDataVar} = standardized;
+ endfor
+
+ endfunction
+
+ % Function application
+
+ ## -*- texinfo -*-
+ ## @node table.varfun
+ ## @deftypefn {Method} {@var{out} =} varfun (@var{fcn}, @var{obj})
+ ## @deftypefnx {Method} {@var{out} =} varfun (@dots{}, @code{'OutputFormat'}, @var{outputFormat})
+ ## @deftypefnx {Method} {@var{out} =} varfun (@dots{}, @code{'InputVariables'}, @var{vars})
+ ## @deftypefnx {Method} {@var{out} =} varfun (@dots{}, @code{'ErrorHandler'}, @var{errorFcn})
+ ##
+ ## Apply function to table variables.
+ ##
+ ## Applies the given function @var{fcn} to each variable in @var{obj},
+ ## collecting the output in a table, cell array, or array of another type.
+ ##
+ ## @end deftypefn
+ function out = varfun (func, A, varargin)
+ mustBeA (A, 'table');
+ validOptions = {'InputVariables', 'GroupingVariables', 'OutputFormat', 'ErrorHandler'};
+ [opts, args] = peelOffNameValueOptions (varargin, validOptions);
+ unimplementedOptions = {'GroupingVariables', 'ErrorHandler'};
+ for i = 1:numel (unimplementedOptions)
+ if isfield (opts, unimplementedOptions{i})
+ error ('table.varfun: Option %s is not yet implemented.', unimplementedOptions{i});
+ endif
+ endfor
+ if ~isa (func, 'function_handle')
+ error ('table.varfun: func must be a function handle; got a %s', class (func));
+ endif
+ outputFormat = 'table';
+ if isfield (opts, 'OutputFormat')
+ outputFormat = opts.OutputFormat;
+ endif
+ validOutputFormats = {'table','uniform','cell'};
+ if ~ismember (outputFormat, validOutputFormats);
+ error ('table.varfun: Invalid OutputFormat: %s. Must be one of: %s', ...
+ outputFormat, strjoin (validOutputFormats, ', '));
+ endif
+ errorHandler = [];
+ if isfield (opts, 'ErrorHandler')
+ if ~isa (opts.ErrorHandler, 'function_handle')
+ error ('table.varfun: ErrorHandler must be a function handle; got a %s', ...
+ class (opts.ErrorHandler));
+ endif
+ errorHandler = opts.ErrorHandler;
+ endif
+
+ tbl = A;
+ if isfield (opts, 'InputVariables')
+ ixInVars = resolveVarRef (tbl, opts.InputVariables);
+ else
+ ixInVars = 1:width (tbl);
+ endif
+
+ outVals = cell (1, numel (ixInVars));
+ for i = 1:numel (ixInVars)
+ ixInVar = ixInVars(i);
+ varVal = tbl.VariableValues{ixInVar};
+ if isempty (errorHandler)
+ fcnOut = feval (func, varVal);
+ else
+ try
+ fcnOut = feval (func, varVal);
+ catch err
+ fcnOut = feval (errorHandler, varVal);
+ end_try_catch
+ endif
+ outVals{i} = fcnOut;
+ endfor
+
+ switch outputFormat
+ case 'cell'
+ out = outVals;
+ case 'table'
+ out = table (outVals{:}, 'VariableNames', tbl.VariableNames);
+ case 'uniform'
+ tfScalar = cellfun('isscalar', outVals);
+ if ~all (tfScalar)
+ ixFirstBad = find(~tfScalar, 1);
+ error (['table.varfun: For OutputFormat ''uniform'', all function ' ...
+ 'outputs must be scalar; output %d was %d long'], ...
+ ixFirstBad, numel (outVals{ixFirstBad}));
+ endif
+ outClasses = cellfun ('class', outVals, 'UniformOutput', false);
+ uOutClasses = unique (outClasses);
+ if numel (uOutClasses) > 1
+ error (['table.varfun: For OutputFormat ''uniform'', all function ' ...
+ 'outputs must be the same type; got a mix of: %s'], ...
+ strjoin (uOutClasses, ', '));
+ endif
+ out = cat (2, outVals{:});
+ endswitch
+ endfunction
+
+ ## -*- texinfo -*-
+ ## @node table.rowfun
+ ## @deftypefn {Method} {@var{out} =} varfun (@var{func}, @var{obj})
+ ## @deftypefnx {Method} {@var{out} =} varfun (@dots{}, @code{'OptionName'}, @var{OptionValue}, @dots{})
+ ##
+ ## Apply function to rows in table and collect outputs.
+ ##
+ ## This applies the function @var{func} to the elements of each row of
+ ## @var{obj}’s variables, and collects the concatenated output(s) into the
+ ## variable(s) of a new table.
+ ##
+ ## @var{func} is a function handle. It should take as many inputs as there
+ ## are variables in @var{obj}. Or, it can take a single input, and you must
+ ## specify @code{'SeparateInputs', false} to have the input variables
+ ## concatenated before being passed to @var{func}. It may return multiple
+ ## argouts, but to capture those past the first one, you must explicitly
+ ## specify the @code{'NumOutputs'} or @code{'OutputVariableNames'} options.
+ ##
+ ## Supported name/value options:
+ ## @table @code
+ ## @item 'OutputVariableNames'
+ ## Names of table variables to store combined function output arguments in.
+ ## @item 'NumOutputs'
+ ## Number of output arguments to call function with. If omitted, defaults to
+ ## number of items in @var{OutputVariableNames} if it is supplied, otherwise
+ ## defaults to 1.
+ ## @item 'SeparateInputs'
+ ## If true, input variables are passed as separate input arguments to @var{func}.
+ ## If false, they are concatenated together into a row vector and passed as
+ ## a single argument. Defaults to true.
+ ## @item 'ErrorHandler'
+ ## A function to call as a fallback when calling @var{func} results in an error.
+ ## It is passed the caught exception, along with the original inputs passed
+ ## to @var{func}, and it has a “second chance” to compute replacement values
+ ## for that row. This is useful for converting raised errors to missing-value
+ ## fill values, or logging warnings.
+ ## @item 'ExtractCellContents'
+ ## Whether to “pop out” the contents of the elements of cell variables in
+ ## @var{obj}, or to leave them as cells. True/false; default is false. If
+ ## you specify this option, then @var{obj} may not have any multi-column
+ ## cell-valued variables.
+ ## @item 'InputVariables'
+ ## If specified, only these variables from @var{obj} are used as the function
+ ## inputs, instead of using all variables.
+ ## @item 'GroupingVariables'
+ ## Not yet implemented.
+ ## @item 'OutputFormat'
+ ## The format of the output. May be @code{'table'} (the default),
+ ## @code{'uniform'}, or @code{'cell'}. If it is @code{'uniform'} or @code{'cell'},
+ ## the output variables are returned in multiple output arguments from
+ ## @code{'rowfun'}.
+ ## @end table
+ ##
+ ## Returns a @code{table} whose variables are the collected output arguments
+ ## of @var{func} if @var{OutputFormat} is @code{'table'}. Otherwise, returns
+ ## multiple output arguments of whatever type @var{func} returned (if
+ ## @var{OutputFormat} is @code{'uniform'}) or cells (if @var{OutputFormat}
+ ## is @code{'cell'}).
+ ##
+ ## @end deftypefn
+ function varargout = rowfun (func, A, varargin)
+ % TODO: Document all the Name/Value options; there's a bunch of them.
+ % TODO: Implement the remaining options.
+
+ % Input handling
+ mustBeA (func, 'function_handle');
+ mustBeA (A, 'table');
+ validOptions = {'InputVariables', 'GroupingVariables', 'OutputFormat', ...
+ 'SeparateInputs', 'ExtractCellContents', 'OutputVariableNames', ...
+ 'NumOutputs', 'ErrorHandler'};
+ [opts, args] = peelOffNameValueOptions (varargin, validOptions);
+ unimplementedOptions = {'GroupingVariables'};
+ for i = 1:numel (unimplementedOptions)
+ if isfield (opts, unimplementedOptions{i})
+ error ('table.rowfun: Option %s is not yet implemented.', unimplementedOptions{i});
+ endif
+ endfor
+ if ~isa (func, 'function_handle')
+ error ('table.rowfun: func must be a function handle; got a %s', class (func));
+ endif
+ output_format = 'table';
+ if isfield (opts, 'OutputFormat')
+ output_format = opts.OutputFormat;
+ endif
+ validOutputFormats = {'table','uniform','cell'};
+ if ~ismember (outputFormat, validOutputFormats);
+ error ('table.rowfun: Invalid OutputFormat: %s. Must be one of: %s', ...
+ outputFormat, strjoin (validOutputFormats, ', '));
+ endif
+ errorHandler = [];
+ if isfield (opts, 'ErrorHandler')
+ if ~isa (opts.ErrorHandler, 'function_handle')
+ error ('table.rowfun: ErrorHandler must be a function handle; got a %s', ...
+ class (opts.ErrorHandler));
+ endif
+ errorHandler = opts.ErrorHandler;
+ endif
+ tbl = A;
+ n_out_args = [];
+ if isfield (opts, 'NumOutputs')
+ mustBeScalar (opts.NumOutputs, 'opts.NumOutputs');
+ mustBeNumeric (opts.NumOutputs, 'opts.NumOutputs');
+ n_out_args = opts.NumOutputs;
+ endif
+ if isfield (opts, 'OutputVariableNames')
+ out_var_names = opts.OutputVariableNames;
+ n_out_args = numel (out_var_names);
+ mustBeCellstr (out_var_names, 'opts.OutputVariableNames');
+ else
+ if isempty (n_out_args)
+ n_out_args = 1;
+ endif
+ out_var_names = cell (1, n_out_args);
+ for i = 1:n_out_args
+ out_var_names = sprintf ("out%d", i);
+ endfor
+ endif
+ do_separate_inputs = true;
+ if isfield (opts, 'SeparateInputs')
+ do_separate_inputs = opts.SeparateInputs;
+ endif
+ do_extract_cell_contents = false;
+ if isfield (opts, 'ExtractCellContents')
+ mustBeScalarLogical (opts.ExtractCellContents, 'opts.ExtractCellContents');
+ do_extract_cell_contents = opts.ExtractCellContents;
+ endif
+ if do_extract_cell_contents
+ for i = 1:width (this)
+ if iscell (this.VariableValues{i}) && size (this.VariableValues{i}, 2) > 1
+ error (['table.rowfun: Calling rowfun on a table with multi-column cell ' ...
+ 'variables is not supported when ''ExtractCellContents'' is true' ...
+ '(bad variable: %s)'], this.VariableNames{i});
+ endif
+ endfor
+ endif
+ if isfield (opts, 'InputVariables')
+ this = subsetvars (this, opts.InputVariables);
+ endif
+
+ % Function application
+ vars = this.VariableValues;
+ n_vars = numel (vars);
+ out_bufs = repmat ({cell(height(this), 1)}, [1 n_out_args]);
+ for i_row = 1:height (this)
+ args = cell (1, n_vars);
+ for i_var = 1:n_vars
+ if do_extract_cell_contents && iscell (vars{i_var})
+ args{i_var} = vars{i_var}{i_row};
+ else
+ args{i_var} = vars{i_var}(i_row,:);
+ endif
+ endfor
+ argouts = cell (1, n_out_args);
+ if do_separate_inputs
+ if isempty (errorHandler)
+ [argouts{:}] = func (args{:});
+ else
+ try
+ [argouts{:}] = func (args{:});
+ catch err
+ [argouts{:}] = errorHandler (err, args{:});
+ end_try_catch
+ endif
+ else
+ single_input = [args{:}];
+ if isempty (errorHandler)
+ [argouts{:}] = func (single_input);
+ else
+ try
+ [argouts{:}] = func (single_input);
+ catch err
+ [argouts{:}] = errorHandler (err, single_input);
+ end_try_catch
+ endif
+ endif
+ for i_out_arg = 1:n_out_args
+ out_bufs{i_out_arg}{i_row} = argouts{i_out_arg};
+ endfor
+ endfor
+
+ % Output packaging
+ switch output_format
+ case 'table'
+ out_vars = cellfun(@(c) cat(1, c{:}), out_bufs);
+ out = table (out_vars{:}, 'VariableNames', out_var_names);
+ varargout = { out };
+ case 'cell'
+ varargout = out_vars;
+ case 'uniform'
+ varargout = cellfun(@(c) cat(1, c{:}), out_bufs);
+ case 'timetable'
+ error ('timetable is not yet implemented. Sorry.');
+ otherwise
+ error ('table.rowfun: Invalid OutputFormat: ''%s''', output_format);
+ endswitch
+ endfunction
+
+ ## -*- texinfo -*-
+ ## @node table.findgroups
+ ## @deftypefn {Method} {[@var{G}, @var{TID}] =} findgroups (@var{obj})
+ ##
+ ## Find groups within a table’s row values.
+ ##
+ ## Finds groups within a table’s row values and get group numbers. A group
+ ## is a set of rows that have the same values in all their variable elements.
+ ##
+ ## Returns:
+ ## @var{G} - A double column vector of group numbers created from @var{obj}.
+ ## @var{TID} - A table containing the row values corresponding to the group numbers.
+ ##
+ ## @end deftypefn
+ function [G, TID] = findgroups (this)
+ %FINDGROUPS Find groups within a table's row values and get group numbers
+ %
+ % [G, TID] = findgroups (this)
+ %
+ % Returns:
+ % G - A double column vector of group numbers created from this.
+ % TID - A table containing the row values corresponding to the group numbers.
+ [TID, ~, G] = unique (this);
+ endfunction
+
+ ## -*- texinfo -*-
+ ## @node table.evalWithVars
+ ## @deftypefn {Method} {@var{out} =} evalWithVars (@var{obj}, @var{expr})
+ ##
+ ## Evaluate an expression against table’s variables.
+ ##
+ ## Evaluates the M-code expression @var{expr} in a workspace where all of @var{obj}’s
+ ## variables have been assigned to workspace variables.
+ ##
+ ## @var{expr} is a charvec containing an Octave expression.
+ ##
+ ## As an implementation detail, the workspace will also contain some variables
+ ## that are prefixed and suffixed with "__". So try to avoid those in your
+ ## table variable names.
+ ##
+ ## Returns the result of the evaluation.
+ ##
+ ## Examples:
+ ##
+ ## @example
+ ## [s,p,sp] = table_examples.SpDb
+ ## tmp = join (sp, p);
+ ## shipment_weight = evalWithVars (tmp, "Qty .* Weight")
+ ## @end example
+ ##
+ ## @end deftypefn
+ function out = evalWithVars (this, expr)
+ if ~ischar (expr)
+ error ('table.evalWithVars: expr must be char; got a %s', class (expr));
+ endif
+ out = __eval_expr_with_table_vars_in_workspace__ (this, expr);
+ endfunction
+
+ ## -*- texinfo -*-
+ ## @node table.restrict
+ ## @deftypefn {Method} {@var{out} =} restrict (@var{obj}, @var{expr})
+ ## @deftypefnx {Method} {@var{out} =} restrict (@var{obj}, @var{ix})
+ ##
+ ## Subset rows using variable expression or index.
+ ##
+ ## Subsets a table row-wise, using either an index vector or an expression
+ ## involving @var{obj}’s variables.
+ ##
+ ## If the argument is a numeric or logical vector, it is interpreted as an
+ ## index into the rows of this. (Just as with `subsetrows (this, index)`.)
+ ##
+ ## If the argument is a char, then it is evaulated as an M-code expression,
+ ## with all of this’ variables available as workspace variables, as with
+ ## @code{evalWithVars}. The output of expr must be a numeric or logical index
+ ## vector (This form is a shorthand for
+ ## @code{out = subsetrows (this, evalWithVars (this, expr))}.)
+ ##
+ ## TODO: Decide whether to name this to "where" to be more like SQL instead
+ ## of relational algebra.
+ ##
+ ## Examples:
+ ## @example
+ ## [s,p,sp] = table_examples.SpDb;
+ ## prettyprint (restrict (p, 'Weight >= 14 & strcmp(Color, "Red")'))
+ ## @end example
+ ##
+ ## @end deftypefn
+ function out = restrict (this, arg)
+ if ischar (arg)
+ rowIx = evalWithVars (this, arg);
+ out = subsetrows (this, rowIx);
+ elseif isnumeric (arg) || islogical (arg)
+ out = subsetrows (this, arg);
+ endif
+ endfunction
+
+ ## -*- texinfo -*-
+ ## @node table.renamevars
+ ## @deftypefn {Method} {@var{out} =} renamevars (@var{obj}, @var{renameMap})
+ ##
+ ## Rename variables in a table.
+ ##
+ ## Renames selected variables in the table @var{obj} based on the mapping
+ ## provided in @var{renameMap}.
+ ##
+ ## @var{renameMap} is an n-by-2 cellstr array, with the old variable names
+ ## in the first column, and the corresponding new variable names in the
+ ## second column.
+ ##
+ ## Variables which are not included in @var{renameMap} are not modified.
+ ##
+ ## It is an error if any variables named in the first column of @var{renameMap}
+ ## are not present in @var{obj}.
+ ##
+ ## Renames
+ ## @end deftypefn
+ function out = renamevars (this, renameMap)
+ if ! iscellstr (renameMap) || size (renameMap, 2) ~= 2
+ error ('renameMap must be an n-by-2 cellstr; got a %s %s', ...
+ size2str (size (renameMap)), class (renameMap));
+ endif
+ [tf,loc] = ismember (renameMap(:,1), this.VariableNames);
+ if ! all (tf)
+ error ('No such variables in this: %s', strjoin (renameMap(~tf,1), ', '));
+ endif
+ out = this;
+ out.VariableNames(loc) = renameMap(:,2);
+ endfunction
+
+ % Prohibited operations
+
+ function out = shiftdims (this, varargin)
+ %SHIFTDIMS Not supported
+ error ('Function shiftdims is not supported for tables');
+ end
+
+ function out = reshape (this, varargin)
+ %RESHAPE Not supported
+ error ('Function reshape is not supported for tables');
+ end
+
+ function out = resize (this, varargin)
+ %RESIZE Not supported
+ error ('Function resize is not supported for tables');
+ end
+
+ function out = vec (this, varargin)
+ %VEC Not supported
+ error ('Function vec is not supported for tables');
+ end
+ end
+
+ methods (Access = private)
+
+ ## -*- texinfo -*-
+ ## @node table.resolveVarRef
+ ## @deftypefn {Method} {[@var{ixVar}, @var{varNames}] =} resolveVarRef (@var{obj}, @var{varRef})
+ ## @deftypefnx {Method} {[@var{ixVar}, @var{varNames}] =} resolveVarRef (@var{obj}, @var{varRef}, @var{strictness})
+ ##
+ ## Resolve a variable reference against this table.
+ ##
+ ## A @var{varRef} is a numeric or char/cellstr indicator of which variables within
+ ## @var{obj} are being referenced.
+ ##
+ ## @var{strictness} controls what to do when the given variable references
+ ## could not be resolved. It may be 'strict' (the default) or 'lenient'.
+ ##
+ ## Returns:
+ ## @var{ixVar} - the indexes of the variables in @var{obj}
+ ## @var{varNames} - a cellstr of the names of the variables in @var{obj}
+ ##
+ ## Raises an error if any of the specified variables could not be resolved,
+ ## unless strictness is 'lenient', in which case it will return 0 for the
+ ## index and '' for the name for each variable which could not be resolved.
+ ##
+ ## @end deftypefn
+ function [ixVar, varNames] = resolveVarRef (this, varRef, strictness)
+ %RESOLVEVARREF Resolve a reference to variables
+ %
+ % A varRef is a numeric or char/cellstr indicator of which variables within
+ % this table are being referenced.
+ if nargin < 3 || isempty (strictness); strictness = 'strict'; end
+ mustBeMember (strictness, {'strict','lenient'});
+ if isnumeric (varRef) || islogical (varRef)
+ ixVar = varRef;
+ ix_bad = find(ixVar > width (this) | ixVar < 1);
+ if ! isempty (ix_bad)
+ error ('table: variable index out of bounds: %d (table has %d variables)', ...
+ ix_bad(1), width (this));
+ endif
+ elseif isequal (varRef, ':')
+ ixVar = 1:width (this);
+ elseif ischar (varRef) || iscellstr (varRef)
+ varRef = cellstr (varRef);
+ [tf, ixVar] = ismember (varRef, this.VariableNames);
+ if isequal (strictness, 'strict')
+ if ~all (tf)
+ error ('table: No such variable in table: %s', strjoin (varRef(~tf), ', '));
+ endif
+ else
+ ixVar(~tf) = 0;
+ endif
+ elseif isa (varRef, 'octave.table.internal.vartype_filter')
+ ixVar = [];
+ for i = 1:width (this)
+ if varRef.matches (this.VariableValues{i})
+ ixVar(end+1) = i;
+ endif
+ endfor
+ else
+ error ('table: Unsupported variable indexing operand type: %s', class (varRef));
+ end
+ varNames = repmat ({''}, size (ixVar));
+ varNames(ixVar != 0) = this.VariableNames(ixVar(ixVar != 0));
+ end
+
+ function [ixRow, ixVar] = resolveRowVarRefs (this, rowRef, varRef)
+ %RESOLVEROWVARREFS Internal implementation method
+ %
+ % This resolves both row and variable refs to indexes.
+ if isnumeric (rowRef) || islogical (rowRef)
+ ixRow = rowRef;
+ elseif iscellstr (rowRef)
+ if isempty (this.RowNames)
+ error ('table: this table has no RowNames');
+ end
+ [tf, ixRow] = ismember (rowRef, this.RowNames);
+ if ~all (tf)
+ error ('table: No such named row in table: %s', strjoin (rowRef(~tf), ', '));
+ end
+ elseif isequal (rowRef, ':')
+ ixRow = 1:height (this);
+ else
+ error ('table: Unsupported row indexing operand type: %s', class (rowRef));
+ end
+
+ ixVar = resolveVarRef (this, varRef);
+ end
+
+ ## -*- texinfo -*-
+ ## @node table.subsetrows
+ ## @deftypefn {Method} {@var{out} =} subsetrows (@var{obj}, @var{ixRows})
+ ##
+ ## Subset table by rows.
+ ##
+ ## Subsets this table by rows.
+ ##
+ ## @var{ixRows} may be a numeric or logical index into the rows of @var{obj}.
+ ##
+ ## @end deftypefn
+ function out = subsetrows (this, ixRows)
+ out = this;
+ if ~isnumeric (ixRows) && ~islogical (ixRows)
+ % TODO: Hmm. Maybe we ought not to do this check, but just defer to the
+ % individual variable values' indexing logic, so SUBSREF/SUBSINDX overrides
+ % are respected. Would produce worse error messages, but be more "right"
+ % type-wise.
+ error ('table.subsetrows: ixRows must be numeric or logical; got a %s', ...
+ class (ixRows));
+ endif
+ for i = 1:width (this)
+ out.VariableValues{i} = out.VariableValues{i}(ixRows,:);
+ end
+ if ~isempty (this.RowNames)
+ out.RowNames = out.RowNames(ixRows);
+ end
+ end
+
+ ## -*- texinfo -*-
+ ## @node table.subsetvars
+ ## @deftypefn {Method} {@var{out} =} subsetvars (@var{obj}, @var{ixVars})
+ ##
+ ## Subset table by variables.
+ ##
+ ## Subsets table @var{obj} by subsetting it along its variables.
+ ##
+ ## ixVars may be:
+ ## - a numeric index vector
+ ## - a logical index vector
+ ## - ":"
+ ## - a cellstr vector of variable names
+ ##
+ ## The resulting table will have its variables reordered to match ixVars.
+ ##
+ ## @end deftypefn
+ function out = subsetvars (this, ixVars)
+ if ischar (ixVars)
+ if ~isequal (ixVars, ':')
+ ixVars = cellstr (ixVars);
+ endif
+ endif
+ if iscellstr (ixVars)
+ [tf,ix] = ismember (ixVars, this.VariableNames);
+ if ~all (tf)
+ error ('table.subsetvars: no such variables in this table: %s', ...
+ strjoin (ixVars(~tf), ', '));
+ endif
+ ixVars = ix;
+ endif
+ out = this;
+ out.VariableNames = this.VariableNames(ixVars);
+ out.VariableValues = this.VariableValues(ixVars);
+ end
+
+ function [outA, outB] = makeVarNamesUnique (A, B)
+ %MAKEVARNAMESUNIQUE Internal implementation method
+ seenNames = struct;
+ namesA = A.VariableNames;
+ for i = 1:numel (namesA)
+ seenNames.(namesA{i}) = true;
+ endfor
+ namesB = B.VariableNames;
+ newNamesB = cell (size (namesB));
+ for i = 1:numel (namesB)
+ oldName = namesB{i};
+ newName = [];
+ if ! isfield (seenNames, oldName)
+ newName = oldName;
+ else
+ newBaseName = [oldName '_B'];
+ if ! isfield (seenNames, newBaseName)
+ newName = newBaseName;
+ else
+ n = 0;
+ while true
+ n = n + 1;
+ candidate = sprintf('%s_%d', newBaseName, n);
+ if ! isfield (seenNames, candidate)
+ newName = candidate;
+ break
+ endif
+ endwhile
+ endif
+ endif
+ newNamesB{i} = newName;
+ senNames.(newName) = true;
+ endfor
+ outA = A;
+ outB = B;
+ outB.VariableNames = newNamesB;
+ endfunction
+
+ function out = transpose_table (this)
+ %TRANSPOSE_TABLE This is for table's internal use
+ if ~hasrownames (this)
+ error ('table.transpose: this must have RowNames, but it does not');
+ endif
+ tfRowNamesAreVarNames = cellfun(@isvarname, this.RowNames);
+ if ~all (tfRowNamesAreVarNames)
+ error ('table.transpose: Row names must all be valid variable names');
+ endif
+ c = table2cell (this);
+ out = cell2table(c', 'VariableNames', this.RowNames, 'RowNames', this.VariableNames);
+ endfunction
+
+ function mustBeAllSameVars (varargin)
+ %MUSTBEALLSAMEVARS Require that all inputs have the same-named columns
+ args = varargin;
+ if isempty (args)
+ return
+ endif
+ varNames = args{1}.VariableNames;
+ for i = 2:numel (args)
+ if ~isequal (varNames, args{i}.VariableNames)
+ error ('Inconsistent VariableNames.\n Input 1: %s\n Input %d: %s', ...
+ strjoin (varNames, ', '), i, strjoin (args{i}.VariableNames, ', '));
+ endif
+ endfor
+ endfunction
+
+ function mustBeAllColVectorVars (this)
+ %MUSTBEALLCOLVECTORVARS Require that all vars in this are vectors, not matrixes
+ for i = 1:width (this)
+ val = this.VariableValues{i};
+ if size (val, 2) ~= 1
+ error ('All variables in input must be vectors, but variable %d (''%s'') has %d columns', ...
+ i, this.VariableNames{i}, size (val, 2));
+ endif
+ endfor
+ endfunction
+
+ function out = getProperties (this)
+ %GETPROPERTIES Get object's properties as a struct
+ %
+ % This is just for the internal use of subsref/subsasgn for .Properties
+ % access support.
+ out = struct;
+ out.VariableNames = this.VariableNames;
+ out.VariableValues = this.VariableValues;
+ out.RowNames = this.RowNames;
+ out.DimensionNames = this.DimensionNames;
+ endfunction
+ endmethods
+
+ methods
+ function [pkA, pkB] = proxykeysForMatrixes (A, B)
+ %PROXYKEYSFORMATRIXES Compute row proxy keys for tables
+ %
+ % [pkA, pkB] = proxykeysForMatrixes (A, B)
+ %
+ % Note: This is called "proxykeysForMatrixes", not "proxyKeysForTables", because
+ % it overrides the generic proxykeysForMatrixes, and tables *are* matrixes.
+
+ if nargin == 1
+ mustBeA (A, 'table');
+ pkA = proxykeysForOneTable (A);
+ else
+ mustBeA (A, 'table');
+ mustBeA (B, 'table');
+ if width (A) != width (B)
+ error ('table.proxykeysForMatrixes: Tables must be same width; got %d vs %d', ...
+ width (A), width (B));
+ endif
+ [pkA, pkB] = proxykeysForTwoTables (A, B);
+ endif
+ endfunction
+ endmethods
+
+ methods (Access = private)
+
+ function out = proxykeysForOneTable (this)
+ varProxyKeys = cell (size (this.VariableNames));
+ for iVar = 1:numel (this.VariableNames);
+ varProxyKeys{iVar} = proxykeysForMatrixes (this.VariableValues{iVar});
+ endfor
+ out = cat (2, varProxyKeys{:});
+ endfunction
+
+ function [pkA, pkB] = proxykeysForTwoTables (A, B)
+ varProxyKeysA = cell (size (A.VariableNames));
+ varProxyKeysB = cell (size (B.VariableNames));
+ for iVar = 1:numel (A.VariableNames)
+ [varProxyKeysA{iVar}, varProxyKeysB{iVar}] = proxykeysForMatrixes (...
+ A.VariableValues{iVar}, B.VariableValues{iVar});
+ endfor
+ pkA = cat (2, varProxyKeysA{:});
+ pkB = cat (2, varProxyKeysB{:});
+ endfunction
+
+ % Summary stuff
+
+ function summary_impl (this, format)
+ if nargin < 2 || isempty (format); format = 'compact'; endif
+ infos = {};
+ for i_var = 1:width (this)
+ infos{i_var} = summary_for_variable (this, i_var);
+ endfor
+ printf ("%s: %d %s by %d %s\n", class (this), height (this), this.DimensionNames{1}, ...
+ width (this), this.DimensionNames{2});
+ switch format
+ case 'long'
+ for i_var = 1:numel (infos)
+ s = infos{i_var};
+ printf ("%d: %s\n", i_var, s.name);
+ if ! ismember (s.type, {"double", "string"})
+ printf (" %s\n", s.type);
+ endif
+ val_col_width = max (cellfun(@numel, s.info(:,2)));
+ val_col_width = max (val_col_width, 8);
+ for i_info = 1:size (s.info, 1)
+ printf (" %-12s %*s\n", [s.info{i_info,1} ":"], val_col_width, s.info{i_info,2});
+ endfor
+ endfor
+ case 'compact'
+ vars_per_line = 4;
+ n_vars = width (this);
+ for i = 1:vars_per_line:n_vars
+ ix = i:min(n_vars, i+vars_per_line-1);
+ ss = infos(ix);
+ strss = cell (size (ss));
+ for j = 1:numel (strss)
+ s = ss{j};
+ col = {};
+ col{end+1} = sprintf ("%d: %s", i_var, s.name);
+ col{end+1} = sprintf (" %s", s.type);
+ val_col_width = max (cellfun(@numel, s.info(:,2)));
+ val_col_width = max (val_col_width, 8);
+ for i_info = 1:size (s.info, 1)
+ col{end+1} = sprintf (" %-12s %*s", [s.info{i_info,1} ":"], ...
+ val_col_width, s.info{i_info,2});
+ endfor
+ strss{j} = col(:);
+ endfor
+ lines = glue_row_strs (strss, 3);
+ for j = 1:numel (lines)
+ printf ("%s\n", lines{j});
+ endfor
+ endfor
+ otherwise
+ error ('table.summary: invalid format: ''%s''', format);
+ endswitch
+ endfunction
+
+ function out = summary_for_variable (this, ix)
+ out.name = this.VariableNames{ix};
+ x = this.VariableValues{ix};
+ out.type = class (x);
+ if size (x, 2) > 1
+ out.info = {
+ 'Columns' size(x, 2)
+ };
+ elseif isnumeric (x)
+ out.info = summary_for_var_numeric (x);
+ elseif iscategorical (x)
+ out.info = summary_for_var_categorical (x);
+ elseif isstring (x) || iscellstr (x)
+ out.info = summary_for_var_string (x);
+ else
+ out.info = cell (0, 2);
+ endif
+ endfunction
+ endmethods
+
+endclassdef
+
+function out = tablevar_dispstrs (x)
+ if isa (x, "string")
+ out = cellstr (x);
+ out(ismissing (x)) = {""};
+ else
+ out = dispstrs (x);
+ endif
+endfunction
+
+function out = glue_row_strs (strss, n_pad_chars)
+ pad = repmat (' ', [1 n_pad_chars]);
+ n_cols = numel (strss);
+ n_rows = max (cellfun (@numel, strss));
+ widths = NaN (1, n_cols);
+ for i = 1:numel(strss)
+ strss{i}(end+1:n_rows) = {''};
+ widths(i) = max (cellfun (@numel, strss{i}));
+ for j = 1:numel(strss{i})
+ if numel(strss{i}{j}) < widths(i)
+ strss{i}{j}(end+1:widths(i)) = ' ';
+ endif
+ endfor
+ endfor
+ grid = cat (2, strss{:});
+ for i_line = 1:n_rows
+ lines{i_line} = strjoin(grid(i_line,:), {pad});
+ endfor
+ out = lines;
+endfunction
+
+function out = summary_for_var_numeric (x)
+ x_min = min (x);
+ x_mean = mean (x);
+ x_max = max (x);
+ x_prcts = prctile (x, [25 50 75]);
+ out = {
+ 'Min.' num2str(x_min)
+ '1st Qu.' num2str(x_prcts(1))
+ 'Median' num2str(x_prcts(2))
+ 'Mean' num2str(x_mean)
+ '3rd Qu.' num2str(x_prcts(3))
+ 'Max.' num2str(x_max)
+ };
+endfunction
+
+function out = summary_for_var_categorical (x)
+ max_ctgs_to_list = 7;
+ u_ctgs = unique (x (!ismissing (x)));
+ n_ctgs = numel (u_ctgs);
+ n_missing = numel (find (ismissing (x)));
+ u_ctg_strs = dispstrs (u_ctgs);
+ if n_ctgs <= max_ctgs_to_list
+ out = {};
+ for i = 1:numel (u_ctgs)
+ out = [out; {
+ u_ctg_strs{i} num2str(numel(find(x == u_ctgs(i))))
+ }];
+ endfor
+ out = [out; {
+ '' num2str(n_missing)
+ }];
+ else
+ out = {
+ 'N. Ctgs.' num2str(n_ctgs)
+ 'N. Miss.' num2str(n_missing)
+ };
+ endif
+endfunction
+
+function out = summary_for_var_string (x)
+ x = string (x);
+ n_missing = numel (find (ismissing (x)));
+ u_strs = unique (x (!ismissing (x)));
+ # TODO: This redundancy calculation looks wrong. Figure out a real one.
+ #redundancy = (numel(x) - n_missing) / numel(u_strs);
+ out = {
+ "Card." num2str(numel(u_strs))
+ "N. Miss." num2str(n_missing)
+ #"Redund." num2str(redundancy)
+ };
+endfunction