322 lines
12 KiB
C++
Raw Normal View History

/*
* Copyright (C) 2011 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* A service that exchanges time synchronization information between
* a master that defines a timeline and clients that follow the timeline.
*/
#define __STDC_LIMIT_MACROS
#define LOG_TAG "common_time"
#include <utils/Log.h>
#include <stdint.h>
#include <common_time/local_clock.h>
#include <assert.h>
#include "clock_recovery.h"
#include "common_clock.h"
#ifdef TIME_SERVICE_DEBUG
#include "diag_thread.h"
#endif
// Define log macro so we can make LOGV into LOGE when we are exclusively
// debugging this code.
#ifdef TIME_SERVICE_DEBUG
#define LOG_TS LOGE
#else
#define LOG_TS LOGV
#endif
namespace android {
ClockRecoveryLoop::ClockRecoveryLoop(LocalClock* local_clock,
CommonClock* common_clock) {
assert(NULL != local_clock);
assert(NULL != common_clock);
local_clock_ = local_clock;
common_clock_ = common_clock;
local_clock_can_slew_ = local_clock_->initCheck() &&
(local_clock_->setLocalSlew(0) == OK);
reset(true, true);
#ifdef TIME_SERVICE_DEBUG
diag_thread_ = new DiagThread(common_clock_, local_clock_);
if (diag_thread_ != NULL) {
status_t res = diag_thread_->startWorkThread();
if (res != OK)
LOGW("Failed to start A@H clock recovery diagnostic thread.");
} else
LOGW("Failed to allocate diagnostic thread.");
#endif
}
ClockRecoveryLoop::~ClockRecoveryLoop() {
#ifdef TIME_SERVICE_DEBUG
diag_thread_->stopWorkThread();
#endif
}
// Constants.
const float ClockRecoveryLoop::dT = 1.0;
const float ClockRecoveryLoop::Kc = 1.0f;
const float ClockRecoveryLoop::Ti = 15.0f;
const float ClockRecoveryLoop::Tf = 0.05;
const float ClockRecoveryLoop::bias_Fc = 0.01;
const float ClockRecoveryLoop::bias_RC = (dT / (2 * 3.14159f * bias_Fc));
const float ClockRecoveryLoop::bias_Alpha = (dT / (bias_RC + dT));
const int64_t ClockRecoveryLoop::panic_thresh_ = 50000;
const int64_t ClockRecoveryLoop::control_thresh_ = 10000;
const float ClockRecoveryLoop::COmin = -100.0f;
const float ClockRecoveryLoop::COmax = 100.0f;
void ClockRecoveryLoop::reset(bool position, bool frequency) {
Mutex::Autolock lock(&lock_);
reset_l(position, frequency);
}
uint32_t ClockRecoveryLoop::findMinRTTNdx(DisciplineDataPoint* data,
uint32_t count) {
uint32_t min_rtt = 0;
for (uint32_t i = 1; i < count; ++i)
if (data[min_rtt].rtt > data[i].rtt)
min_rtt = i;
return min_rtt;
}
bool ClockRecoveryLoop::pushDisciplineEvent(int64_t local_time,
int64_t nominal_common_time,
int64_t rtt) {
Mutex::Autolock lock(&lock_);
int64_t local_common_time = 0;
common_clock_->localToCommon(local_time, &local_common_time);
int64_t raw_delta = nominal_common_time - local_common_time;
#ifdef TIME_SERVICE_DEBUG
LOGE("local=%lld, common=%lld, delta=%lld, rtt=%lld\n",
local_common_time, nominal_common_time,
raw_delta, rtt);
#endif
// If we have not defined a basis for common time, then we need to use these
// initial points to do so. In order to avoid significant initial error
// from a particularly bad startup data point, we collect the first N data
// points and choose the best of them before moving on.
if (!common_clock_->isValid()) {
if (startup_filter_wr_ < kStartupFilterSize) {
DisciplineDataPoint& d = startup_filter_data_[startup_filter_wr_];
d.local_time = local_time;
d.nominal_common_time = nominal_common_time;
d.rtt = rtt;
startup_filter_wr_++;
}
if (startup_filter_wr_ == kStartupFilterSize) {
uint32_t min_rtt = findMinRTTNdx(startup_filter_data_,
kStartupFilterSize);
common_clock_->setBasis(
startup_filter_data_[min_rtt].local_time,
startup_filter_data_[min_rtt].nominal_common_time);
}
return true;
}
int64_t observed_common;
int64_t delta;
float delta_f, dCO;
int32_t correction_cur;
if (OK != common_clock_->localToCommon(local_time, &observed_common)) {
// Since we just checked to make certain that this conversion was valid,
// and no one else in the system should be messing with it, if this
// conversion is suddenly invalid, it is a good reason to panic.
LOGE("Failed to convert local time to common time in %s:%d",
__PRETTY_FUNCTION__, __LINE__);
return false;
}
// Implement a filter which should match NTP filtering behavior when a
// client is associated with only one peer of lower stratum. Basically,
// always use the best of the N last data points, where best is defined as
// lowest round trip time. NTP uses an N of 8; we use a value of 6.
//
// TODO(johngro) : experiment with other filter strategies. The goal here
// is to mitigate the effects of high RTT data points which typically have
// large asymmetries in the TX/RX legs. Downside of the existing NTP
// approach (particularly because of the PID controller we are using to
// produce the control signal from the filtered data) are that the rate at
// which discipline events are actually acted upon becomes irregular and can
// become drawn out (the time between actionable event can go way up). If
// the system receives a strong high quality data point, the proportional
// component of the controller can produce a strong correction which is left
// in place for too long causing overshoot. In addition, the integral
// component of the system currently is an approximation based on the
// assumption of a more or less homogeneous sampling of the error. Its
// unclear what the effect of undermining this assumption would be right
// now.
// Two ideas which come to mind immediately would be to...
// 1) Keep a history of more data points (32 or so) and ignore data points
// whose RTT is more than a certain number of standard deviations outside
// of the norm.
// 2) Eliminate the PID controller portion of this system entirely.
// Instead, move to a system which uses a very wide filter (128 data
// points or more) with a sum-of-least-squares line fitting approach to
// tracking the long term drift. This would take the place of the I
// component in the current PID controller. Also use a much more narrow
// outlier-rejector filter (as described in #1) to drive a short term
// correction factor similar to the P component of the PID controller.
assert(filter_wr_ < kFilterSize);
filter_data_[filter_wr_].local_time = local_time;
filter_data_[filter_wr_].observed_common_time = observed_common;
filter_data_[filter_wr_].nominal_common_time = nominal_common_time;
filter_data_[filter_wr_].rtt = rtt;
filter_data_[filter_wr_].point_used = false;
uint32_t current_point = filter_wr_;
filter_wr_ = (filter_wr_ + 1) % kFilterSize;
if (!filter_wr_)
filter_full_ = true;
uint32_t scan_end = filter_full_ ? kFilterSize : filter_wr_;
uint32_t min_rtt = findMinRTTNdx(filter_data_, scan_end);
// We only use packets with low RTTs for control. If the packet RTT
// is less than the panic threshold, we can probably eat the jitter with the
// control loop. Otherwise, take the packet only if it better than all
// of the packets we have in the history. That way we try to track
// something, even if it is noisy.
if (current_point == min_rtt || rtt < control_thresh_) {
delta_f = delta = nominal_common_time - observed_common;
// Compute the error then clamp to the panic threshold. If we ever
// exceed this amt of error, its time to panic and reset the system.
// Given that the error in the measurement of the error could be as
// high as the RTT of the data point, we don't actually panic until
// the implied error (delta) is greater than the absolute panic
// threashold plus the RTT. IOW - we don't panic until we are
// absoluely sure that our best case sync is worse than the absolute
// panic threshold.
int64_t effective_panic_thresh = panic_thresh_ + rtt;
if ((delta > effective_panic_thresh) ||
(delta < -effective_panic_thresh)) {
// PANIC!!!
reset_l(false, true);
return false;
}
} else {
// We do not have a good packet to look at, but we also do not want to
// free-run the clock at some crazy slew rate. So we guess the
// trajectory of the clock based on the last controller output and the
// estimated bias of our clock against the master.
// The net effect of this is that CO == CObias after some extended
// period of no feedback.
delta_f = last_delta_f_ - dT*(CO - CObias);
delta = delta_f;
}
// Velocity form PI control equation.
dCO = Kc * (1.0f + dT/Ti) * delta_f - Kc * last_delta_f_;
CO += dCO * Tf; // Filter CO by applying gain <1 here.
// Save error terms for later.
last_delta_f_ = delta_f;
last_delta_ = delta;
// Clamp CO to +/- 100ppm.
if (CO < COmin)
CO = COmin;
else if (CO > COmax)
CO = COmax;
// Update the controller bias.
CObias = bias_Alpha * CO + (1.0f - bias_Alpha) * lastCObias;
lastCObias = CObias;
// Convert PPM to 16-bit int range. Add some guard band (-0.01) so we
// don't get fp weirdness.
correction_cur = CO * 327.66;
// If there was a change in the amt of correction to use, update the
// system.
if (correction_cur_ != correction_cur) {
correction_cur_ = correction_cur;
applySlew();
}
LOG_TS("clock_loop %lld %f %f %f %d\n", raw_delta, delta_f, CO, CObias, correction_cur);
#ifdef TIME_SERVICE_DEBUG
diag_thread_->pushDisciplineEvent(
local_time,
observed_common,
nominal_common_time,
correction_cur,
rtt);
#endif
return true;
}
Implement new common_time service functionality. Major re-factor of the common_time (formally aah_timesrv) service in preparation for up-integration into Android master. This work includes bug fixes, new features, and general code cleanup. High points are listed below. + CommonClock interface has been enhanced to allow querying of many more low level synchronization details; mostly for debugging, but in theory useful to an application as well. + CommonTimeConfig interface has been implemented. This allows a management process to configure a number of different parameters (many of them new) to control the behavior of the common_time service. Most importantly, the time service can be bound to a specific network interface and should only operate on that interface an no others. + Enhance log messages to be more useful in determining what the time service state machine is doing and why. + Enhance information provided by dumpsys to provide many more details about the quality of time sync and the network conditions which gave rise to the current quality conditions. Features, features, features.... + Add a feature which lets the high level choose a different master election endpoint so that multiple time synchronization domains can co-exist on the same subnet (mostly to support a potential use case of multiple home domains in a multiple dwelling environment like a hotel, dormitory or apartment complex). + Add a feature which lets the high level assign a 64-bit group ID which allows partitioning of time synchronization domains even when the master election endpoint is shared (as it might be if broadcast is being used instead of multicast) + Add an auto-disable feature which lets the time service drop into network-less mode when there are no active clients of the common_time service in the device. Mostly for phones, this allows phones to not consume network/battery resources when they don't need to maintain common time. + Add a feature which lets the high level choose the priority of the common_time service in the master election protocol. This allows high level decisions about things like mobile vs non-mobile, wired ethernet vs WiFi to affect who ends up with the job of master on a given network. Priority overrides at the low level also allow clients coming in from network-less mode to lower their effective priority as they join a new network so as to not disrupt any stable long-running timeline which may already be active on the network. + Add the ability to control some of the core parameters of the time sync service which effect network load (like the sync polling interval and the master announce interval) Change-Id: I71af15a83cfa5ef0417b406928967fb9e02f55c6
2012-01-20 12:12:59 -08:00
int32_t ClockRecoveryLoop::getLastErrorEstimate() {
Mutex::Autolock lock(&lock_);
if (last_delta_valid_)
return last_delta_;
else
return ICommonClock::kErrorEstimateUnknown;
}
void ClockRecoveryLoop::reset_l(bool position, bool frequency) {
assert(NULL != common_clock_);
if (position) {
common_clock_->resetBasis();
startup_filter_wr_ = 0;
}
if (frequency) {
last_delta_valid_ = false;
last_delta_ = 0;
last_delta_f_ = 0.0;
correction_cur_ = 0x0;
CO = 0.0f;
lastCObias = CObias = 0.0f;
applySlew();
}
filter_wr_ = 0;
filter_full_ = false;
}
void ClockRecoveryLoop::applySlew() {
if (local_clock_can_slew_) {
local_clock_->setLocalSlew(correction_cur_);
} else {
// The SW clock recovery implemented by the common clock class expects
// values expressed in PPM. CO is in ppm.
common_clock_->setSlew(local_clock_->getLocalTime(), CO);
}
}
} // namespace android