382 lines
14 KiB
C++
382 lines
14 KiB
C++
|
/*
|
||
|
* Copyright (C) 2011 The Android Open Source Project
|
||
|
*
|
||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||
|
* you may not use this file except in compliance with the License.
|
||
|
* You may obtain a copy of the License at
|
||
|
*
|
||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||
|
*
|
||
|
* Unless required by applicable law or agreed to in writing, software
|
||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
|
* See the License for the specific language governing permissions and
|
||
|
* limitations under the License.
|
||
|
*/
|
||
|
|
||
|
/*
|
||
|
* A service that exchanges time synchronization information between
|
||
|
* a master that defines a timeline and clients that follow the timeline.
|
||
|
*/
|
||
|
|
||
|
#define __STDC_LIMIT_MACROS
|
||
|
#define LOG_TAG "common_time"
|
||
|
#include <utils/Log.h>
|
||
|
#include <stdint.h>
|
||
|
|
||
|
#include <common_time/local_clock.h>
|
||
|
#include <assert.h>
|
||
|
|
||
|
#include "clock_recovery.h"
|
||
|
#include "common_clock.h"
|
||
|
#ifdef TIME_SERVICE_DEBUG
|
||
|
#include "diag_thread.h"
|
||
|
#endif
|
||
|
|
||
|
namespace android {
|
||
|
|
||
|
ClockRecoveryLoop::ClockRecoveryLoop(LocalClock* local_clock,
|
||
|
CommonClock* common_clock) {
|
||
|
assert(NULL != local_clock);
|
||
|
assert(NULL != common_clock);
|
||
|
|
||
|
local_clock_ = local_clock;
|
||
|
common_clock_ = common_clock;
|
||
|
|
||
|
local_clock_can_slew_ = local_clock_->initCheck() &&
|
||
|
(local_clock_->setLocalSlew(0) == OK);
|
||
|
|
||
|
computePIDParams();
|
||
|
reset(true, true);
|
||
|
|
||
|
#ifdef TIME_SERVICE_DEBUG
|
||
|
diag_thread_ = new DiagThread(common_clock_, local_clock_);
|
||
|
if (diag_thread_ != NULL) {
|
||
|
status_t res = diag_thread_->startWorkThread();
|
||
|
if (res != OK)
|
||
|
ALOGW("Failed to start A@H clock recovery diagnostic thread.");
|
||
|
} else
|
||
|
ALOGW("Failed to allocate diagnostic thread.");
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
ClockRecoveryLoop::~ClockRecoveryLoop() {
|
||
|
#ifdef TIME_SERVICE_DEBUG
|
||
|
diag_thread_->stopWorkThread();
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
void ClockRecoveryLoop::reset(bool position, bool frequency) {
|
||
|
Mutex::Autolock lock(&lock_);
|
||
|
reset_l(position, frequency);
|
||
|
}
|
||
|
|
||
|
uint32_t ClockRecoveryLoop::findMinRTTNdx(DisciplineDataPoint* data,
|
||
|
uint32_t count) {
|
||
|
uint32_t min_rtt = 0;
|
||
|
for (uint32_t i = 1; i < count; ++i)
|
||
|
if (data[min_rtt].rtt > data[i].rtt)
|
||
|
min_rtt = i;
|
||
|
|
||
|
return min_rtt;
|
||
|
}
|
||
|
|
||
|
bool ClockRecoveryLoop::pushDisciplineEvent(int64_t local_time,
|
||
|
int64_t nominal_common_time,
|
||
|
int64_t rtt) {
|
||
|
Mutex::Autolock lock(&lock_);
|
||
|
|
||
|
// If we have not defined a basis for common time, then we need to use these
|
||
|
// initial points to do so. In order to avoid significant initial error
|
||
|
// from a particularly bad startup data point, we collect the first N data
|
||
|
// points and choose the best of them before moving on.
|
||
|
if (!common_clock_->isValid()) {
|
||
|
if (startup_filter_wr_ < kStartupFilterSize) {
|
||
|
DisciplineDataPoint& d = startup_filter_data_[startup_filter_wr_];
|
||
|
d.local_time = local_time;
|
||
|
d.nominal_common_time = nominal_common_time;
|
||
|
d.rtt = rtt;
|
||
|
startup_filter_wr_++;
|
||
|
}
|
||
|
|
||
|
if (startup_filter_wr_ == kStartupFilterSize) {
|
||
|
uint32_t min_rtt = findMinRTTNdx(startup_filter_data_,
|
||
|
kStartupFilterSize);
|
||
|
|
||
|
common_clock_->setBasis(
|
||
|
startup_filter_data_[min_rtt].local_time,
|
||
|
startup_filter_data_[min_rtt].nominal_common_time);
|
||
|
}
|
||
|
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
int64_t observed_common;
|
||
|
int64_t delta;
|
||
|
int32_t delta32;
|
||
|
int32_t correction_cur;
|
||
|
int32_t correction_cur_P = 0;
|
||
|
int32_t correction_cur_I = 0;
|
||
|
int32_t correction_cur_D = 0;
|
||
|
|
||
|
if (OK != common_clock_->localToCommon(local_time, &observed_common)) {
|
||
|
// Since we just checked to make certain that this conversion was valid,
|
||
|
// and no one else in the system should be messing with it, if this
|
||
|
// conversion is suddenly invalid, it is a good reason to panic.
|
||
|
ALOGE("Failed to convert local time to common time in %s:%d",
|
||
|
__PRETTY_FUNCTION__, __LINE__);
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
// Implement a filter which should match NTP filtering behavior when a
|
||
|
// client is associated with only one peer of lower stratum. Basically,
|
||
|
// always use the best of the N last data points, where best is defined as
|
||
|
// lowest round trip time. NTP uses an N of 8; we use a value of 6.
|
||
|
//
|
||
|
// TODO(johngro) : experiment with other filter strategies. The goal here
|
||
|
// is to mitigate the effects of high RTT data points which typically have
|
||
|
// large asymmetries in the TX/RX legs. Downside of the existing NTP
|
||
|
// approach (particularly because of the PID controller we are using to
|
||
|
// produce the control signal from the filtered data) are that the rate at
|
||
|
// which discipline events are actually acted upon becomes irregular and can
|
||
|
// become drawn out (the time between actionable event can go way up). If
|
||
|
// the system receives a strong high quality data point, the proportional
|
||
|
// component of the controller can produce a strong correction which is left
|
||
|
// in place for too long causing overshoot. In addition, the integral
|
||
|
// component of the system currently is an approximation based on the
|
||
|
// assumption of a more or less homogeneous sampling of the error. Its
|
||
|
// unclear what the effect of undermining this assumption would be right
|
||
|
// now.
|
||
|
|
||
|
// Two ideas which come to mind immediately would be to...
|
||
|
// 1) Keep a history of more data points (32 or so) and ignore data points
|
||
|
// whose RTT is more than a certain number of standard deviations outside
|
||
|
// of the norm.
|
||
|
// 2) Eliminate the PID controller portion of this system entirely.
|
||
|
// Instead, move to a system which uses a very wide filter (128 data
|
||
|
// points or more) with a sum-of-least-squares line fitting approach to
|
||
|
// tracking the long term drift. This would take the place of the I
|
||
|
// component in the current PID controller. Also use a much more narrow
|
||
|
// outlier-rejector filter (as described in #1) to drive a short term
|
||
|
// correction factor similar to the P component of the PID controller.
|
||
|
assert(filter_wr_ < kFilterSize);
|
||
|
filter_data_[filter_wr_].local_time = local_time;
|
||
|
filter_data_[filter_wr_].observed_common_time = observed_common;
|
||
|
filter_data_[filter_wr_].nominal_common_time = nominal_common_time;
|
||
|
filter_data_[filter_wr_].rtt = rtt;
|
||
|
filter_data_[filter_wr_].point_used = false;
|
||
|
filter_wr_ = (filter_wr_ + 1) % kFilterSize;
|
||
|
if (!filter_wr_)
|
||
|
filter_full_ = true;
|
||
|
|
||
|
// Scan the accumulated data for the point with the minimum RTT. If that
|
||
|
// point has never been used before, go ahead and use it now, otherwise just
|
||
|
// do nothing.
|
||
|
uint32_t scan_end = filter_full_ ? kFilterSize : filter_wr_;
|
||
|
uint32_t min_rtt = findMinRTTNdx(filter_data_, scan_end);
|
||
|
if (filter_data_[min_rtt].point_used)
|
||
|
return true;
|
||
|
|
||
|
local_time = filter_data_[min_rtt].local_time;
|
||
|
observed_common = filter_data_[min_rtt].observed_common_time;
|
||
|
nominal_common_time = filter_data_[min_rtt].nominal_common_time;
|
||
|
filter_data_[min_rtt].point_used = true;
|
||
|
|
||
|
// Compute the error then clamp to the panic threshold. If we ever exceed
|
||
|
// this amt of error, its time to panic and reset the system. Given that
|
||
|
// the error in the measurement of the error could be as high as the RTT of
|
||
|
// the data point, we don't actually panic until the implied error (delta)
|
||
|
// is greater than the absolute panic threashold plus the RTT. IOW - we
|
||
|
// don't panic until we are absoluely sure that our best case sync is worse
|
||
|
// than the absolute panic threshold.
|
||
|
int64_t effective_panic_thresh = panic_thresh_ + filter_data_[min_rtt].rtt;
|
||
|
delta = nominal_common_time - observed_common;
|
||
|
if ((delta > effective_panic_thresh) || (delta < -effective_panic_thresh)) {
|
||
|
// PANIC!!!
|
||
|
//
|
||
|
// TODO(johngro) : need to report this to the upper levels of
|
||
|
// code.
|
||
|
reset_l(false, true);
|
||
|
return false;
|
||
|
} else
|
||
|
delta32 = delta;
|
||
|
|
||
|
// Accumulate error into the integrated error, then clamp.
|
||
|
integrated_error_ += delta32;
|
||
|
if (integrated_error_ > pid_params_.integrated_delta_max)
|
||
|
integrated_error_ = pid_params_.integrated_delta_max;
|
||
|
else if (integrated_error_ < pid_params_.integrated_delta_min)
|
||
|
integrated_error_ = pid_params_.integrated_delta_min;
|
||
|
|
||
|
// Compute the difference in error between last time and this time, then
|
||
|
// update last_delta_
|
||
|
int32_t input_D = last_delta_valid_ ? delta32 - last_delta_ : 0;
|
||
|
last_delta_valid_ = true;
|
||
|
last_delta_ = delta32;
|
||
|
|
||
|
// Compute the various components of the correction value.
|
||
|
correction_cur_P = doGainScale(pid_params_.gain_P, delta32);
|
||
|
correction_cur_I = doGainScale(pid_params_.gain_I, integrated_error_);
|
||
|
|
||
|
// TODO(johngro) : the differential portion of this code used to rely
|
||
|
// upon a completely homogeneous discipline frequency. Now that the
|
||
|
// discipline frequency may not be homogeneous, its probably important
|
||
|
// to divide by the amt of time between discipline events during the
|
||
|
// gain calculation.
|
||
|
correction_cur_D = doGainScale(pid_params_.gain_D, input_D);
|
||
|
|
||
|
// Compute the final correction value and clamp.
|
||
|
correction_cur = correction_cur_P + correction_cur_I + correction_cur_D;
|
||
|
if (correction_cur < pid_params_.correction_min)
|
||
|
correction_cur = pid_params_.correction_min;
|
||
|
else if (correction_cur > pid_params_.correction_max)
|
||
|
correction_cur = pid_params_.correction_max;
|
||
|
|
||
|
// If there was a change in the amt of correction to use, update the
|
||
|
// system.
|
||
|
if (correction_cur_ != correction_cur) {
|
||
|
correction_cur_ = correction_cur;
|
||
|
applySlew();
|
||
|
}
|
||
|
|
||
|
ALOGV("rtt %lld observed %lld nominal %lld delta = %5lld "
|
||
|
"int = %7d correction %5d (P %5d, I %5d, D %5d)\n",
|
||
|
filter_data_[min_rtt].rtt,
|
||
|
observed_common,
|
||
|
nominal_common_time,
|
||
|
nominal_common_time - observed_common,
|
||
|
integrated_error_,
|
||
|
correction_cur,
|
||
|
correction_cur_P,
|
||
|
correction_cur_I,
|
||
|
correction_cur_D);
|
||
|
|
||
|
#ifdef TIME_SERVICE_DEBUG
|
||
|
diag_thread_->pushDisciplineEvent(
|
||
|
local_time,
|
||
|
observed_common,
|
||
|
nominal_common_time,
|
||
|
correction_cur,
|
||
|
correction_cur_P,
|
||
|
correction_cur_I,
|
||
|
correction_cur_D);
|
||
|
#endif
|
||
|
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
int32_t ClockRecoveryLoop::getLastErrorEstimate() {
|
||
|
Mutex::Autolock lock(&lock_);
|
||
|
|
||
|
if (last_delta_valid_)
|
||
|
return last_delta_;
|
||
|
else
|
||
|
return ICommonClock::kErrorEstimateUnknown;
|
||
|
}
|
||
|
|
||
|
void ClockRecoveryLoop::computePIDParams() {
|
||
|
// TODO(johngro) : add the ability to fetch parameters from the driver/board
|
||
|
// level in case they have a HW clock discipline solution with parameters
|
||
|
// tuned specifically for it.
|
||
|
|
||
|
// Correction factor is limited to MIN/MAX_INT_16
|
||
|
pid_params_.correction_min = -0x8000;
|
||
|
pid_params_.correction_max = 0x7FFF;
|
||
|
|
||
|
// Default proportional gain to 2^15:1000. (max proportional drive at 1mSec
|
||
|
// of instantaneous error)
|
||
|
memset(&pid_params_.gain_P, 0, sizeof(pid_params_.gain_P));
|
||
|
pid_params_.gain_P.a_to_b_numer = 0x8000;
|
||
|
pid_params_.gain_P.a_to_b_denom = 1000;
|
||
|
|
||
|
// Set the integral gain to 2^15:5000
|
||
|
memset(&pid_params_.gain_I, 0, sizeof(pid_params_.gain_I));
|
||
|
pid_params_.gain_I.a_to_b_numer = 0x8000;
|
||
|
pid_params_.gain_I.a_to_b_denom = 5000;
|
||
|
|
||
|
// Default controller is just a PI controller. Right now, the network based
|
||
|
// measurements of the error are way to noisy to feed into the differential
|
||
|
// component of a PID controller. Someday we might come back and add some
|
||
|
// filtering of the error channel, but until then leave the controller as a
|
||
|
// simple PI controller.
|
||
|
memset(&pid_params_.gain_D, 0, sizeof(pid_params_.gain_D));
|
||
|
|
||
|
// Don't let the integral component of the controller wind up to
|
||
|
// the point where it would want to drive the correction factor
|
||
|
// past saturation.
|
||
|
int64_t tmp;
|
||
|
pid_params_.gain_I.doReverseTransform(pid_params_.correction_min, &tmp);
|
||
|
pid_params_.integrated_delta_min = static_cast<int32_t>(tmp);
|
||
|
pid_params_.gain_I.doReverseTransform(pid_params_.correction_max, &tmp);
|
||
|
pid_params_.integrated_delta_max = static_cast<int32_t>(tmp);
|
||
|
|
||
|
// By default, panic when are certain that the sync error is > 20mSec;
|
||
|
panic_thresh_ = 20000;
|
||
|
}
|
||
|
|
||
|
void ClockRecoveryLoop::reset_l(bool position, bool frequency) {
|
||
|
assert(NULL != common_clock_);
|
||
|
|
||
|
if (position) {
|
||
|
common_clock_->resetBasis();
|
||
|
startup_filter_wr_ = 0;
|
||
|
}
|
||
|
|
||
|
if (frequency) {
|
||
|
last_delta_valid_ = false;
|
||
|
last_delta_ = 0;
|
||
|
integrated_error_ = 0;
|
||
|
correction_cur_ = 0;
|
||
|
applySlew();
|
||
|
}
|
||
|
|
||
|
filter_wr_ = 0;
|
||
|
filter_full_ = false;
|
||
|
}
|
||
|
|
||
|
int32_t ClockRecoveryLoop::doGainScale(const LinearTransform& gain,
|
||
|
int32_t val) {
|
||
|
if (!gain.a_to_b_numer || !gain.a_to_b_denom || !val)
|
||
|
return 0;
|
||
|
|
||
|
int64_t tmp;
|
||
|
int64_t val64 = static_cast<int64_t>(val);
|
||
|
if (!gain.doForwardTransform(val64, &tmp)) {
|
||
|
ALOGW("Overflow/Underflow while scaling %d in %s",
|
||
|
val, __PRETTY_FUNCTION__);
|
||
|
return (val < 0) ? INT32_MIN : INT32_MAX;
|
||
|
}
|
||
|
|
||
|
if (tmp > INT32_MAX) {
|
||
|
ALOGW("Overflow while scaling %d in %s", val, __PRETTY_FUNCTION__);
|
||
|
return INT32_MAX;
|
||
|
}
|
||
|
|
||
|
if (tmp < INT32_MIN) {
|
||
|
ALOGW("Underflow while scaling %d in %s", val, __PRETTY_FUNCTION__);
|
||
|
return INT32_MIN;
|
||
|
}
|
||
|
|
||
|
return static_cast<int32_t>(tmp);
|
||
|
}
|
||
|
|
||
|
void ClockRecoveryLoop::applySlew() {
|
||
|
if (local_clock_can_slew_) {
|
||
|
local_clock_->setLocalSlew(correction_cur_);
|
||
|
} else {
|
||
|
// The SW clock recovery implemented by the common clock class expects
|
||
|
// values expressed in PPM. Map the MIN/MAX_INT_16 drive range to +/-
|
||
|
// 100ppm.
|
||
|
int sw_correction;
|
||
|
sw_correction = correction_cur_ - pid_params_.correction_min;
|
||
|
sw_correction *= 200;
|
||
|
sw_correction /= (pid_params_.correction_max -
|
||
|
pid_params_.correction_min);
|
||
|
sw_correction -= 100;
|
||
|
|
||
|
common_clock_->setSlew(local_clock_->getLocalTime(), sw_correction);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
} // namespace android
|