Details

    • Type: Improvement
    • Status: Closed
    • Priority: Minor
    • Resolution: Won't Fix
    • Affects Version/s: ejabberd 1.1.3
    • Fix Version/s: None
    • Component/s: XMPP Compliance
    • Labels:
      None

      Description

      According to XEP-136, server implementation must allow the client to replicate archive, i.e. get all changes in archive stored on server side since last synchronization. This feature is very important for proper client support implementation, as it allows faster operations (using locally cached archive) as well as searches through the archive.

      Here is the patch to mod_archive that adds preliminary support for this feature. I haven't tested all possible scenarios yet, but examples listed in XEP-136 seem to be processed correctly.

      Besides of that, there are two bugs solved in this patch:

      1) In current implementation values for 'save' attribute are either 'true' or 'false', but according to XEP-136, they have to be one of 'false', 'body', 'message', 'stream' - thus, 'true' was changed to 'body' ('message' and 'stream' seem to be rarely needed so their support is not that important).

      2) There were 2 errors in RSM implementation, which prevented it from functioning on significant number of examples. To verify the error and check the fix you can use RSM examples similar to those listed in XEP-136: before the fix, for "after" request the implementation gave incorrect results and "before" one just resulted in internal error, the fix seem to solve all issues I was able to spot (tested on collections listing, collections retrieval and "modified" requests).

      Patch is for https://svn.process-one.net/ejabberd-modules/mod_archive/trunk/src/mod_archive.erl

      Index: mod_archive.erl
      ===================================================================
      — mod_archive.erl (revision 165)
      +++ mod_archive.erl (working copy)
      @@ -71,6 +71,14 @@
      message_list = [],
      subject = ""}).

      +-record(archive_changes,
      +

      {usjs, + us, + jid, + start, + change_type, + change_time}

      ).
      +
      -record(msg,

      {direction, secs, body}

      ).

      %%====================================================================
      @@ -121,6 +129,9 @@
      mnesia:create_table(archive_message,
      [

      {disc_copies, [node()]},
      {attributes, record_info(fields, archive_message)}]),
      + mnesia:create_table(archive_changes,
      + [{disc_copies, [node()]}

      ,
      +

      {attributes, record_info(fields, archive_changes)}

      ]),
      % mnesia:add_table_index(archive_options, us),
      mnesia:add_table_index(archive_message, us),
      ejabberd_hooks:add(remove_user, Host, ?MODULE, remove_user, 50),
      @@ -243,6 +254,7 @@
      "retrieve" -> process_local_iq_retrieve(From, To, IQ);
      "save" -> process_local_iq_store(From, To, IQ);
      "remove" -> process_local_iq_remove(From, To, IQ);
      + "modified" -> process_local_iq_modified(From, To, IQ);
      _ -> IQ#iq

      {type = error, sub_el = [SubEl, ?ERR_FEATURE_NOT_IMPLEMENTED]}

      end
      @@ -256,6 +268,11 @@
      F = fun() ->
      lists:foreach(
      fun(R) ->
      + CL = #archive_changes

      {usjs = R#archive_message.usjs, + us = R#archive_message.us, + jid = R#archive_message.jid, + start = R#archive_message.start}

      ,
      + mnesia:delete_object(CL),
      mnesia:delete_object(R)
      end,
      mnesia:index_read(archive_message, US, #archive_message.us)),
      @@ -323,7 +340,14 @@
      message_list = [Message]};
      [E] -> E#archive_message

      {message_list=lists:append(E#archive_message.message_list, [Message])}

      end,

      • mnesia:write(NE)
        + mnesia:write(NE),
        + CL = #archive_changes {usjs = NE#archive_message.usjs, + us = NE#archive_message.us, + jid = NE#archive_message.jid, + start = NE#archive_message.start, + change_type = changed, + change_time = Tm}

        ,
        + mnesia:write(CL)
        end),
        NStorages.

      @@ -419,7 +443,11 @@
      AutoSave -> "true";
      true -> "false"
      end,

      • [
        Unknown macro: {xmlelement, "default", [{"save", "false"}, {"otr", "forbid"}], []},
        + SaveMode = if
        + AutoSave -> "body";
        + true -> "false"
        + end,
        + [{xmlelement, "default", [{"save", SaveMode}, {"otr", "forbid"}], []}

        ,

        Unknown macro: {xmlelement, "auto", [{"save", SaveAttr}], []}

        ].

      @@ -452,7 +480,7 @@
      % parse the list of xml element, and modify the given archive_option
      transaction_parse_save_elem(Options, [

      {xmlelement, "default", Attrs, _}

      | Tail]) ->
      V = case xml:get_attr_s("save", Attrs) of

      • "true" -> true;
        + "body" -> true;
        "false" -> false;
        _ -> unset
        end,
        @@ -596,7 +624,16 @@
        case parse_store_element (LUser, LServer, SubEl) of
        {error, E} -> IQ#iq{type = error, sub_el = [SubEl, E]};
        Collection ->
        - case mnesia:transaction(fun() -> mnesia:write(Collection) end) of
        + case mnesia:transaction(fun() ->
        + mnesia:write(Collection),
        + CL = #archive_changes{usjs = Collection#archive_message.us, + us = Collection#archive_message.us, + jid = Collection#archive_message.jid, + start = Collection#archive_message.start, + change_type = changed, + change_time = get_timestamp()},
        + mnesia:write(CL)
        + end) of
        {atomic, _} ->
        IQ#iq{type = result, sub_el=[]};
        _ ->
        @@ -778,7 +815,16 @@
        end.

        process_remove_index(Index) ->
        - case mnesia:transaction(fun() -> mnesia:delete({archive_message, Index}) end) of
        + case mnesia:transaction(fun() ->
        + {LUser, LServer, Jid, Start} = Index,
        + CL = #archive_changes{usjs = Index,
        + us = {LUser, LServer},
        + jid = Jid,
        + start = Start,
        + change_type = removed,
        + change_time = get_timestamp()},
        + mnesia:write(CL),
        + mnesia:delete({archive_message, Index}) end) of
        {atomic, _} ->
        ok;
        {aborted, _} ->
        @@ -791,7 +837,16 @@
        start='$1', message_list='', subject = ''},
        Guard = [{'>=', '$1', Start},{'<', '$1', End}],

        - lists:foreach(fun(R) -> mnesia:delete_object(R) end,
        + lists:foreach(fun(R) ->
        + CL = #archive_changes{usjs = R#archive_message.usjs, + us = R#archive_message.us, + jid = R#archive_message.jid, + start = R#archive_message.start, + change_type = removed, + change_time = get_timestamp()},
        + mnesia:write(CL),
        + mnesia:delete_object(R)
        + end,
        mnesia:select(archive_message, [{Pat, Guard, ['$_']}]))
        end,

        @@ -819,6 +874,24 @@
        {error, ?ERRT_INTERNAL_SERVER_ERROR("", "plop")}
        end.

        +% return {ok, [{#archive_message}]} or {error, xmlelement}
        +% With is a tuple Jid, or '_'
        +get_modified(LUser, LServer, Start, End, With) ->
        + case mnesia:transaction(fun() ->
        + Pat = #archive_changes{usjs= '_',
        + us = {LUser, LServer},
        + jid= With,
        + start='$1',
        + change_type='_',
        + change_time='_'},
        + Guard = [{'>=', '$1', Start},{'<', '$1', End}]
        ,
        + mnesia:select(archive_changes, [{Pat, Guard, ['$_']}])
        + end) of
        + {atomic, Result} ->
        + {ok, Result};
        + {aborted, _} ->
        + {error, ?ERRT_INTERNAL_SERVER_ERROR("", "plop")}
        + end.

        % Index is {LUser, LServer, With, Start}
        get_collection(Index) ->
        @@ -832,7 +905,67 @@



        +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
        +%% 10. Replication
        +%%

        +
        +process_local_iq_modified(From, _To, #iq{type = Type, sub_el = SubEl} = IQ) ->
        + #jid{luser = LUser, lserver = LServer} = From,
        + case Type of
        + set ->
        + IQ#iq{type = error, sub_el = [SubEl, ?ERR_NOT_ALLOWED]};
        + get ->
        + {xmlelement, _, Attrs, SubEls} = SubEl,
        + RSM = parse_rsm(SubEls),
        + ?MYDEBUG("RSM Results: ~p ~n", [RSM]),
        + Result = case parse_root_argument(Attrs) of
        + {error, E}

        ->

        {error, E}

        ;
        +

        {interval, Start, Stop}

        ->
        + get_modified(LUser, LServer, Start, Stop, '_');
        +

        {interval, Start, Stop, Jid}

        ->
        + get_modified(LUser, LServer, Start, Stop, Jid);
        +

        {index, Jid, Start}

        ->
        + get_modified(LUser, LServer, Start, infinity, Jid);
        + _ ->

        {error, ?ERR_BAD_REQUEST}

        + end,
        + case Result of
        +

        {ok, Items}

        ->
        + Zero = calendar:datetime_to_gregorian_seconds(1970, 1, 1}, {0, 0, 0),
        + FunId = fun(El) ->
        + Secs = element(7,El) - Zero,
        + jlib:now_to_utc_string(

        {Secs div 1000000, Secs rem 1000000, 0}

        )
        + end,
        + FunCompare = fun(Id, El) ->
        +

        {MegaSecs1, Secs1, _}

        = jlib:datetime_string_to_timestamp(Id),
        + Id1 = MegaSecs1 * 1000000 + Secs1 - Zero,
        +

        {MegaSecs2, Secs2, _}

        = jlib:datetime_string_to_timestamp(FunId(El)),
        + Id2 = MegaSecs2 * 1000000 + Secs2 - Zero,
        + if Id1 == Id2 -> equal;
        + Id1 > Id2 -> greater;
        + Id1 < Id2 -> smaller
        + end
        + end,
        + case catch execute_rsm(RSM, lists:keysort(7, Items), FunId,FunCompare) of
        +

        {error, R} ->
        + IQ#iq{type = error, sub_el = [SubEl, R]};
        + {'EXIT', Errr} ->
        + ?MYDEBUG("INTERNAL ERROR ~p ~n", [Errr]),
        + IQ#iq{type = error, sub_el = [SubEl, ?ERR_INTERNAL_SERVER_ERROR]};
        + {RSM_Elem, Items2} ->
        + Fun = fun(A) ->
        + Seconds= A#archive_changes.start - Zero,
        + Start2 = jlib:now_to_utc_string({Seconds div 1000000, Seconds rem 1000000, 0}),
        + Args = [{"with", jlib:jid_to_string(A#archive_changes.jid)}, {"start", Start2}],
        + {xmlelement, atom_to_list(A#archive_changes.change_type), Args, []}
        + end,
        + IQ#iq{type = result, sub_el = [{xmlelement, "modified", [{"xmlns", ?NS_ARCHIVE}], lists:append(lists:map(Fun, Items2),[RSM_Elem])}]}
        + end;
        + {error, R}

        ->
        + IQ#iq

        {type = error, sub_el = [SubEl, R]}

        + end
        + end.
        +
        %%%%%%%%%%%%%%%%%%%%%%%%%%
        % Utility

      @@ -1029,7 +1162,7 @@
      {index,
      case S of

      {id, IdentIndex}

      ->

      • integer_to_list(length(List) - list_to_integer(IdentIndex));
        + {id, integer_to_list(length(List) - list_to_integer(IdentIndex))}

        ;
        _ -> S
        end};
        _ ->
        @@ -1052,9 +1185,9 @@
        execute_rsm_aux({{id,I}, M, normal} = RSM, [E | Tail], IdFun, Acc) ->
        case IdFun(I, E) of
        smaller ->

      • execute_rsm_aux(RSM, Tail, IdFun, Acc + 1);
        + execute_rsm_aux( {0, M, normal}, [E | Tail], IdFun, Acc);
        _ ->
        - execute_rsm_aux({0, M, normal}

        , [E | Tail], IdFun, Acc)
        + execute_rsm_aux(RSM, Tail, IdFun, Acc + 1)
        end;

      execute_rsm_aux({{id,_}, _, normal}, [], _, Acc) ->

        Activity

        Hide
        mremond@process-one.net Mickaël Rémond added a comment -

        Thank you, Alexander.
        We will review your patch for integration.

        Show
        mremond@process-one.net Mickaël Rémond added a comment - Thank you, Alexander. We will review your patch for integration.
        Hide
        mremond@process-one.net Mickaël Rémond added a comment -

        A new mod_archive_sql.erl doe not exist along with mod_archive.erl (mnesia).
        We need to have both version at the same level before commit.

        Show
        mremond@process-one.net Mickaël Rémond added a comment - A new mod_archive_sql.erl doe not exist along with mod_archive.erl (mnesia). We need to have both version at the same level before commit.
        Hide
        ndl Alexander Tsvyashchenko added a comment -

        With the mod_archive_odbc release (http://www.ndl.kiev.ua/typo/articles/2007/11/14/mod_archive_odbc-release) this submission is no longer relevant as mod_archive_odbc is more complete implementation and, thus, makes this patch unnecessary.

        Show
        ndl Alexander Tsvyashchenko added a comment - With the mod_archive_odbc release ( http://www.ndl.kiev.ua/typo/articles/2007/11/14/mod_archive_odbc-release ) this submission is no longer relevant as mod_archive_odbc is more complete implementation and, thus, makes this patch unnecessary.
        Hide
        mremond@process-one.net Mickaël Rémond added a comment -

        Hello,

        How does it relate to mod_archive sql version in SVN (ejabberd-modules) ?

        https://svn.process-one.net/ejabberd-modules/mod_archive/trunk/

        Show
        mremond@process-one.net Mickaël Rémond added a comment - Hello, How does it relate to mod_archive sql version in SVN (ejabberd-modules) ? https://svn.process-one.net/ejabberd-modules/mod_archive/trunk/
        Hide
        mremond@process-one.net Mickaël Rémond added a comment -

        Comment from Alexander Tsvyashchenko using another email address, so was not added to the ticket):

        Hi Mickaël,

        Please see
        http://www.ndl.kiev.ua/typo/articles/2007/11/14/mod_archive_odbc-release
        article for detailed description of the differences.

        Basically, mod_archive_sql version has the same drawbacks as mod_archive:

        1) Poor architecture when processing RSM-based queries which makes it
        hard to work with large datasets - it's better there than in
        mod_archive as at least it does not fetch all messages, only all
        collections when answering queries, but still this can be quite
        expensive & prohibitedly expensive for really large datasets.

        2) Lack of many XEP-136 parts, the most important being replication,
        but there were quite some other incompartibilities scattered here and
        there.

        3) Besides of that, to the best of my understanding mod_archive_sql
        works only with PostgreSQL, as it's tied to its features and syntax in
        code.

        As I stated it in description, mod_archive_odbc was based on
        mod_archive and mod_archive_sql, so it can be considered to be derived
        from mod_archive_sql, but with fixes to all the problems mentioned
        above. If that is ok for ejabberd developers I would suggest to
        replace mod_archive_sql by mod_archive_odbc, as it should be superior
        in all aspects to mod_archive_sql - but this probably
        should be discussed in more depth with the author of mod_archive_sql, and
        some
        additional testing should be done, as I haven't tested
        mod_archive_odbc with PostgreSQL.

        As for the mnesia-based mod_archive, after gathering some experience
        while working on mod_archive_odbc I'm even more sure now that
        currently original, mnesia-based mod_archive just has no future -
        there's currently no way to implement it on mnesia efficiently enough.
        Some hacks can be done, of course, but it's just not worth the effort,
        so I would suggest to consider seriously leaving only SQL-based
        version in ejabberd.

        Good luck! Alexander

        Show
        mremond@process-one.net Mickaël Rémond added a comment - Comment from Alexander Tsvyashchenko using another email address, so was not added to the ticket): Hi Mickaël, Please see http://www.ndl.kiev.ua/typo/articles/2007/11/14/mod_archive_odbc-release article for detailed description of the differences. Basically, mod_archive_sql version has the same drawbacks as mod_archive: 1) Poor architecture when processing RSM-based queries which makes it hard to work with large datasets - it's better there than in mod_archive as at least it does not fetch all messages, only all collections when answering queries, but still this can be quite expensive & prohibitedly expensive for really large datasets. 2) Lack of many XEP-136 parts, the most important being replication, but there were quite some other incompartibilities scattered here and there. 3) Besides of that, to the best of my understanding mod_archive_sql works only with PostgreSQL, as it's tied to its features and syntax in code. As I stated it in description, mod_archive_odbc was based on mod_archive and mod_archive_sql, so it can be considered to be derived from mod_archive_sql, but with fixes to all the problems mentioned above. If that is ok for ejabberd developers I would suggest to replace mod_archive_sql by mod_archive_odbc, as it should be superior in all aspects to mod_archive_sql - but this probably should be discussed in more depth with the author of mod_archive_sql, and some additional testing should be done, as I haven't tested mod_archive_odbc with PostgreSQL. As for the mnesia-based mod_archive, after gathering some experience while working on mod_archive_odbc I'm even more sure now that currently original, mnesia-based mod_archive just has no future - there's currently no way to implement it on mnesia efficiently enough. Some hacks can be done, of course, but it's just not worth the effort, so I would suggest to consider seriously leaving only SQL-based version in ejabberd. Good luck! Alexander

          People

          • Votes:
            0 Vote for this issue
            Watchers:
            1 Start watching this issue

            Dates

            • Created:
              Updated:
              Resolved:

              Development