382 lines
14 KiB
C++
Raw Normal View History

/*
* Copyright (C) 2011 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* A service that exchanges time synchronization information between
* a master that defines a timeline and clients that follow the timeline.
*/
#define __STDC_LIMIT_MACROS
#define LOG_TAG "common_time"
#include <utils/Log.h>
#include <stdint.h>
#include <common_time/local_clock.h>
#include <assert.h>
#include "clock_recovery.h"
#include "common_clock.h"
#ifdef TIME_SERVICE_DEBUG
#include "diag_thread.h"
#endif
namespace android {
ClockRecoveryLoop::ClockRecoveryLoop(LocalClock* local_clock,
CommonClock* common_clock) {
assert(NULL != local_clock);
assert(NULL != common_clock);
local_clock_ = local_clock;
common_clock_ = common_clock;
local_clock_can_slew_ = local_clock_->initCheck() &&
(local_clock_->setLocalSlew(0) == OK);
computePIDParams();
reset(true, true);
#ifdef TIME_SERVICE_DEBUG
diag_thread_ = new DiagThread(common_clock_, local_clock_);
if (diag_thread_ != NULL) {
status_t res = diag_thread_->startWorkThread();
if (res != OK)
ALOGW("Failed to start A@H clock recovery diagnostic thread.");
} else
ALOGW("Failed to allocate diagnostic thread.");
#endif
}
ClockRecoveryLoop::~ClockRecoveryLoop() {
#ifdef TIME_SERVICE_DEBUG
diag_thread_->stopWorkThread();
#endif
}
void ClockRecoveryLoop::reset(bool position, bool frequency) {
Mutex::Autolock lock(&lock_);
reset_l(position, frequency);
}
uint32_t ClockRecoveryLoop::findMinRTTNdx(DisciplineDataPoint* data,
uint32_t count) {
uint32_t min_rtt = 0;
for (uint32_t i = 1; i < count; ++i)
if (data[min_rtt].rtt > data[i].rtt)
min_rtt = i;
return min_rtt;
}
bool ClockRecoveryLoop::pushDisciplineEvent(int64_t local_time,
int64_t nominal_common_time,
int64_t rtt) {
Mutex::Autolock lock(&lock_);
// If we have not defined a basis for common time, then we need to use these
// initial points to do so. In order to avoid significant initial error
// from a particularly bad startup data point, we collect the first N data
// points and choose the best of them before moving on.
if (!common_clock_->isValid()) {
if (startup_filter_wr_ < kStartupFilterSize) {
DisciplineDataPoint& d = startup_filter_data_[startup_filter_wr_];
d.local_time = local_time;
d.nominal_common_time = nominal_common_time;
d.rtt = rtt;
startup_filter_wr_++;
}
if (startup_filter_wr_ == kStartupFilterSize) {
uint32_t min_rtt = findMinRTTNdx(startup_filter_data_,
kStartupFilterSize);
common_clock_->setBasis(
startup_filter_data_[min_rtt].local_time,
startup_filter_data_[min_rtt].nominal_common_time);
}
return true;
}
int64_t observed_common;
int64_t delta;
int32_t delta32;
int32_t correction_cur;
int32_t correction_cur_P = 0;
int32_t correction_cur_I = 0;
int32_t correction_cur_D = 0;
if (OK != common_clock_->localToCommon(local_time, &observed_common)) {
// Since we just checked to make certain that this conversion was valid,
// and no one else in the system should be messing with it, if this
// conversion is suddenly invalid, it is a good reason to panic.
ALOGE("Failed to convert local time to common time in %s:%d",
__PRETTY_FUNCTION__, __LINE__);
return false;
}
// Implement a filter which should match NTP filtering behavior when a
// client is associated with only one peer of lower stratum. Basically,
// always use the best of the N last data points, where best is defined as
// lowest round trip time. NTP uses an N of 8; we use a value of 6.
//
// TODO(johngro) : experiment with other filter strategies. The goal here
// is to mitigate the effects of high RTT data points which typically have
// large asymmetries in the TX/RX legs. Downside of the existing NTP
// approach (particularly because of the PID controller we are using to
// produce the control signal from the filtered data) are that the rate at
// which discipline events are actually acted upon becomes irregular and can
// become drawn out (the time between actionable event can go way up). If
// the system receives a strong high quality data point, the proportional
// component of the controller can produce a strong correction which is left
// in place for too long causing overshoot. In addition, the integral
// component of the system currently is an approximation based on the
// assumption of a more or less homogeneous sampling of the error. Its
// unclear what the effect of undermining this assumption would be right
// now.
// Two ideas which come to mind immediately would be to...
// 1) Keep a history of more data points (32 or so) and ignore data points
// whose RTT is more than a certain number of standard deviations outside
// of the norm.
// 2) Eliminate the PID controller portion of this system entirely.
// Instead, move to a system which uses a very wide filter (128 data
// points or more) with a sum-of-least-squares line fitting approach to
// tracking the long term drift. This would take the place of the I
// component in the current PID controller. Also use a much more narrow
// outlier-rejector filter (as described in #1) to drive a short term
// correction factor similar to the P component of the PID controller.
assert(filter_wr_ < kFilterSize);
filter_data_[filter_wr_].local_time = local_time;
filter_data_[filter_wr_].observed_common_time = observed_common;
filter_data_[filter_wr_].nominal_common_time = nominal_common_time;
filter_data_[filter_wr_].rtt = rtt;
filter_data_[filter_wr_].point_used = false;
filter_wr_ = (filter_wr_ + 1) % kFilterSize;
if (!filter_wr_)
filter_full_ = true;
// Scan the accumulated data for the point with the minimum RTT. If that
// point has never been used before, go ahead and use it now, otherwise just
// do nothing.
uint32_t scan_end = filter_full_ ? kFilterSize : filter_wr_;
uint32_t min_rtt = findMinRTTNdx(filter_data_, scan_end);
if (filter_data_[min_rtt].point_used)
return true;
local_time = filter_data_[min_rtt].local_time;
observed_common = filter_data_[min_rtt].observed_common_time;
nominal_common_time = filter_data_[min_rtt].nominal_common_time;
filter_data_[min_rtt].point_used = true;
// Compute the error then clamp to the panic threshold. If we ever exceed
// this amt of error, its time to panic and reset the system. Given that
// the error in the measurement of the error could be as high as the RTT of
// the data point, we don't actually panic until the implied error (delta)
// is greater than the absolute panic threashold plus the RTT. IOW - we
// don't panic until we are absoluely sure that our best case sync is worse
// than the absolute panic threshold.
int64_t effective_panic_thresh = panic_thresh_ + filter_data_[min_rtt].rtt;
delta = nominal_common_time - observed_common;
if ((delta > effective_panic_thresh) || (delta < -effective_panic_thresh)) {
// PANIC!!!
//
// TODO(johngro) : need to report this to the upper levels of
// code.
reset_l(false, true);
return false;
} else
delta32 = delta;
// Accumulate error into the integrated error, then clamp.
integrated_error_ += delta32;
if (integrated_error_ > pid_params_.integrated_delta_max)
integrated_error_ = pid_params_.integrated_delta_max;
else if (integrated_error_ < pid_params_.integrated_delta_min)
integrated_error_ = pid_params_.integrated_delta_min;
// Compute the difference in error between last time and this time, then
// update last_delta_
int32_t input_D = last_delta_valid_ ? delta32 - last_delta_ : 0;
last_delta_valid_ = true;
last_delta_ = delta32;
// Compute the various components of the correction value.
correction_cur_P = doGainScale(pid_params_.gain_P, delta32);
correction_cur_I = doGainScale(pid_params_.gain_I, integrated_error_);
// TODO(johngro) : the differential portion of this code used to rely
// upon a completely homogeneous discipline frequency. Now that the
// discipline frequency may not be homogeneous, its probably important
// to divide by the amt of time between discipline events during the
// gain calculation.
correction_cur_D = doGainScale(pid_params_.gain_D, input_D);
// Compute the final correction value and clamp.
correction_cur = correction_cur_P + correction_cur_I + correction_cur_D;
if (correction_cur < pid_params_.correction_min)
correction_cur = pid_params_.correction_min;
else if (correction_cur > pid_params_.correction_max)
correction_cur = pid_params_.correction_max;
// If there was a change in the amt of correction to use, update the
// system.
if (correction_cur_ != correction_cur) {
correction_cur_ = correction_cur;
applySlew();
}
ALOGV("rtt %lld observed %lld nominal %lld delta = %5lld "
"int = %7d correction %5d (P %5d, I %5d, D %5d)\n",
filter_data_[min_rtt].rtt,
observed_common,
nominal_common_time,
nominal_common_time - observed_common,
integrated_error_,
correction_cur,
correction_cur_P,
correction_cur_I,
correction_cur_D);
#ifdef TIME_SERVICE_DEBUG
diag_thread_->pushDisciplineEvent(
local_time,
observed_common,
nominal_common_time,
correction_cur,
correction_cur_P,
correction_cur_I,
correction_cur_D);
#endif
return true;
}
int32_t ClockRecoveryLoop::getLastErrorEstimate() {
Mutex::Autolock lock(&lock_);
if (last_delta_valid_)
return last_delta_;
else
return ICommonClock::kErrorEstimateUnknown;
}
void ClockRecoveryLoop::computePIDParams() {
// TODO(johngro) : add the ability to fetch parameters from the driver/board
// level in case they have a HW clock discipline solution with parameters
// tuned specifically for it.
// Correction factor is limited to MIN/MAX_INT_16
pid_params_.correction_min = -0x8000;
pid_params_.correction_max = 0x7FFF;
// Default proportional gain to 2^15:1000. (max proportional drive at 1mSec
// of instantaneous error)
memset(&pid_params_.gain_P, 0, sizeof(pid_params_.gain_P));
pid_params_.gain_P.a_to_b_numer = 0x8000;
pid_params_.gain_P.a_to_b_denom = 1000;
// Set the integral gain to 2^15:5000
memset(&pid_params_.gain_I, 0, sizeof(pid_params_.gain_I));
pid_params_.gain_I.a_to_b_numer = 0x8000;
pid_params_.gain_I.a_to_b_denom = 5000;
// Default controller is just a PI controller. Right now, the network based
// measurements of the error are way to noisy to feed into the differential
// component of a PID controller. Someday we might come back and add some
// filtering of the error channel, but until then leave the controller as a
// simple PI controller.
memset(&pid_params_.gain_D, 0, sizeof(pid_params_.gain_D));
// Don't let the integral component of the controller wind up to
// the point where it would want to drive the correction factor
// past saturation.
int64_t tmp;
pid_params_.gain_I.doReverseTransform(pid_params_.correction_min, &tmp);
pid_params_.integrated_delta_min = static_cast<int32_t>(tmp);
pid_params_.gain_I.doReverseTransform(pid_params_.correction_max, &tmp);
pid_params_.integrated_delta_max = static_cast<int32_t>(tmp);
// By default, panic when are certain that the sync error is > 20mSec;
panic_thresh_ = 20000;
}
void ClockRecoveryLoop::reset_l(bool position, bool frequency) {
assert(NULL != common_clock_);
if (position) {
common_clock_->resetBasis();
startup_filter_wr_ = 0;
}
if (frequency) {
last_delta_valid_ = false;
last_delta_ = 0;
integrated_error_ = 0;
correction_cur_ = 0;
applySlew();
}
filter_wr_ = 0;
filter_full_ = false;
}
int32_t ClockRecoveryLoop::doGainScale(const LinearTransform& gain,
int32_t val) {
if (!gain.a_to_b_numer || !gain.a_to_b_denom || !val)
return 0;
int64_t tmp;
int64_t val64 = static_cast<int64_t>(val);
if (!gain.doForwardTransform(val64, &tmp)) {
ALOGW("Overflow/Underflow while scaling %d in %s",
val, __PRETTY_FUNCTION__);
return (val < 0) ? INT32_MIN : INT32_MAX;
}
if (tmp > INT32_MAX) {
ALOGW("Overflow while scaling %d in %s", val, __PRETTY_FUNCTION__);
return INT32_MAX;
}
if (tmp < INT32_MIN) {
ALOGW("Underflow while scaling %d in %s", val, __PRETTY_FUNCTION__);
return INT32_MIN;
}
return static_cast<int32_t>(tmp);
}
void ClockRecoveryLoop::applySlew() {
if (local_clock_can_slew_) {
local_clock_->setLocalSlew(correction_cur_);
} else {
// The SW clock recovery implemented by the common clock class expects
// values expressed in PPM. Map the MIN/MAX_INT_16 drive range to +/-
// 100ppm.
int sw_correction;
sw_correction = correction_cur_ - pid_params_.correction_min;
sw_correction *= 200;
sw_correction /= (pid_params_.correction_max -
pid_params_.correction_min);
sw_correction -= 100;
common_clock_->setSlew(local_clock_->getLocalTime(), sw_correction);
}
}
} // namespace android