commit 2e5ec371e455f3df471062d198d4ce5117f731b5
Author: Ben Wiederhake <BenWiederhake.GitHub@gmx.de>
Date: Fri May 24 00:14:45 2024 +0000
diff --git a/bridge/config/config.go b/bridge/config/config.go
index 18c6092..75792ed 100644
--- a/bridge/config/config.go
+++ b/bridge/config/config.go
@@ -1216 +1217 @@ type Protocol struct {
MessageLength int // IRC, max length of a message allowed
MessageQueue int // IRC, size of message queue for flood control
MessageSplit bool // IRC, split long messages with newlines on MessageLength instead of clipping
+ MessageSplitMaxCount int // discord, split long messages into at most this many messages instead of clipping (MessageLength=1950 cannot be configured)
Muc string // xmpp
MxID string // matrix
Name string // all protocols
diff --git a/bridge/discord/discord.go b/bridge/discord/discord.go
index 9bac021..2707ad2 100644
--- a/bridge/discord/discord.go
+++ b/bridge/discord/discord.go
@@ -3166 +3167 @@ func (b *Bdiscord) handleEventBotUser(msg *config.Message, channelID string) (st
// Upload a file if it exists
if msg.Extra != nil {
for _, rmsg := range helper.HandleExtra(msg, b.General) {
+ // TODO: Use ClipOrSplitMessage
rmsg.Text = helper.ClipMessage(rmsg.Text, MessageLength, b.GetString("MessageClipped"))
if _, err := b.c.ChannelMessageSend(channelID, rmsg.Username+rmsg.Text); err != nil {
b.Log.Errorf("Could not send message %#v: %s", rmsg, err)
@@ -32735 +32853 @@ func (b *Bdiscord) handleEventBotUser(msg *config.Message, channelID string) (st
}
}
- msg.Text = helper.ClipMessage(msg.Text, MessageLength, b.GetString("MessageClipped"))
- msg.Text = b.replaceUserMentions(msg.Text)
-
// Edit message
if msg.ID != "" {
- _, err := b.c.ChannelMessageEdit(channelID, msg.ID, msg.Username+msg.Text)
- return msg.ID, err
+ // Exploit that a discord message ID is actually just a large number, and we encode a list of IDs by separating them with ";".
+ var msgIds = strings.Split(msg.ID, ";")
+ msgParts := helper.ClipOrSplitMessage(b.replaceUserMentions(msg.Text), MessageLength, b.GetString("MessageClipped"), len(msgIds))
+ for len(msgParts) < len(msgIds) {
+ msgParts = append(msgParts, "((obsoleted by edit))")
+ }
+ for i := range msgParts {
+ // In case of split-messages where some parts remain the same (i.e. only a typo-fix in a huge message), this causes some noop-updates.
+ // TODO: Optimize away noop-updates of un-edited messages
+ // TODO: Use RemoteNickFormat instead of this broken concatenation
+ _, err := b.c.ChannelMessageEdit(channelID, msgIds[i], msg.Username+msgParts[i])
+ if err != nil {
+ return "", err
+ }
+ }
+ return msg.ID, nil
}
- m := discordgo.MessageSend{
- Content: msg.Username + msg.Text,
- AllowedMentions: b.getAllowedMentions(),
- }
+ msgParts := helper.ClipOrSplitMessage(b.replaceUserMentions(msg.Text), MessageLength, b.GetString("MessageClipped"), b.GetInt("MessageSplitMaxCount"))
+ var msgIds = []string{}
- if msg.ParentValid() {
- m.Reference = &discordgo.MessageReference{
- MessageID: msg.ParentID,
- ChannelID: channelID,
- GuildID: b.guildID,
+ for _, msgPart := range msgParts {
+ m := discordgo.MessageSend{
+ Content: msg.Username + msgPart,
+ AllowedMentions: b.getAllowedMentions(),
}
- }
- // Post normal message
- res, err := b.c.ChannelMessageSendComplex(channelID, &m)
- if err != nil {
- return "", err
+ if msg.ParentValid() {
+ m.Reference = &discordgo.MessageReference{
+ MessageID: msg.ParentID,
+ ChannelID: channelID,
+ GuildID: b.guildID,
+ }
+ }
+
+ // Post normal message
+ res, err := b.c.ChannelMessageSendComplex(channelID, &m)
+ if err != nil {
+ return "", err
+ }
+ msgIds = append(msgIds, res.ID)
}
- return res.ID, nil
+ // Exploit that a discord message ID is actually just a large number, so we encode a list of IDs by separating them with ";".
+ return strings.Join(msgIds, ";"), nil
}
// handleUploadFile handles native upload of files
diff --git a/bridge/discord/webhook.go b/bridge/discord/webhook.go
index b518ea6..4e647b3 100644
--- a/bridge/discord/webhook.go
+++ b/bridge/discord/webhook.go
@@ -26 +27 @@ package bdiscord
import (
"bytes"
+ "strings"
"github.com/42wim/matterbridge/bridge/config"
"github.com/42wim/matterbridge/bridge/helper"
@@ -4213 +4365 @@ func (b *Bdiscord) maybeGetLocalAvatar(msg *config.Message) string {
return ""
}
+func (b *Bdiscord) webhookSendTextOnly(msg *config.Message, channelID string) (string, error) {
+ msgParts := helper.ClipOrSplitMessage(msg.Text, MessageLength, b.GetString("MessageClipped"), b.GetInt("MessageSplitMaxCount"))
+ var msgIds = []string{}
+ for _, msgPart := range msgParts {
+ res, err := b.transmitter.Send(
+ channelID,
+ &discordgo.WebhookParams{
+ Content: msgPart,
+ Username: msg.Username,
+ AvatarURL: msg.Avatar,
+ AllowedMentions: b.getAllowedMentions(),
+ },
+ )
+ if err != nil {
+ return "", err
+ } else {
+ msgIds = append(msgIds, res.ID)
+ }
+ }
+ // Exploit that a discord message ID is actually just a large number, so we encode a list of IDs by separating them with ";".
+ return strings.Join(msgIds, ";"), nil
+}
+
+func (b *Bdiscord) webhookSendFilesOnly(msg *config.Message, channelID string) error {
+ for _, f := range msg.Extra["file"] {
+ fi := f.(config.FileInfo)
+ file := discordgo.File{
+ Name: fi.Name,
+ ContentType: "",
+ Reader: bytes.NewReader(*fi.Data),
+ }
+ content := fi.Comment
+
+ // Cannot use the resulting ID for any edits anyway, so throw it away.
+ // This has to be re-enabled when we implement message deletion.
+ _, err := b.transmitter.Send(
+ channelID,
+ &discordgo.WebhookParams{
+ Username: msg.Username,
+ AvatarURL: msg.Avatar,
+ Files: []*discordgo.File{&file},
+ Content: content,
+ AllowedMentions: b.getAllowedMentions(),
+ },
+ )
+ if err != nil {
+ b.Log.Errorf("Could not send file %#v for message %#v: %s", file, msg, err)
+ return err
+ }
+ }
+ return nil
+}
+
// webhookSend send one or more message via webhook, taking care of file
// uploads (from slack, telegram or mattermost).
// Returns messageID and error.
-func (b *Bdiscord) webhookSend(msg *config.Message, channelID string) (*discordgo.Message, error) {
+func (b *Bdiscord) webhookSend(msg *config.Message, channelID string) (string, error) {
var (
- res *discordgo.Message
- res2 *discordgo.Message
+ res string
err error
)
@@ -6148 +11411 @@ func (b *Bdiscord) webhookSend(msg *config.Message, channelID string) (*discordg
// We can't send empty messages.
if msg.Text != "" {
- res, err = b.transmitter.Send(
- channelID,
- &discordgo.WebhookParams{
- Content: msg.Text,
- Username: msg.Username,
- AvatarURL: msg.Avatar,
- AllowedMentions: b.getAllowedMentions(),
- },
- )
- if err != nil {
- b.Log.Errorf("Could not send text (%s) for message %#v: %s", msg.Text, msg, err)
- }
+ res, err = b.webhookSendTextOnly(msg, channelID)
}
- if msg.Extra != nil {
- for _, f := range msg.Extra["file"] {
- fi := f.(config.FileInfo)
- file := discordgo.File{
- Name: fi.Name,
- ContentType: "",
- Reader: bytes.NewReader(*fi.Data),
- }
- content := fi.Comment
-
- res2, err = b.transmitter.Send(
- channelID,
- &discordgo.WebhookParams{
- Username: msg.Username,
- AvatarURL: msg.Avatar,
- Files: []*discordgo.File{&file},
- Content: content,
- AllowedMentions: b.getAllowedMentions(),
- },
- )
- if err != nil {
- b.Log.Errorf("Could not send file %#v for message %#v: %s", file, msg, err)
- }
- }
- }
-
- if msg.Text == "" {
- res = res2
+ if err == nil && msg.Extra != nil {
+ err = b.webhookSendFilesOnly(msg, channelID)
}
return res, err
@@ -12035 +13644 @@ func (b *Bdiscord) handleEventWebhook(msg *config.Message, channelID string) (st
return "", nil
}
- msg.Text = helper.ClipMessage(msg.Text, MessageLength, b.GetString("MessageClipped"))
- msg.Text = b.replaceUserMentions(msg.Text)
// discord username must be [0..32] max
if len(msg.Username) > 32 {
msg.Username = msg.Username[0:32]
}
if msg.ID != "" {
+ // Exploit that a discord message ID is actually just a large number, and we encode a list of IDs by separating them with ";".
+ var msgIds = strings.Split(msg.ID, ";")
+ msgParts := helper.ClipOrSplitMessage(b.replaceUserMentions(msg.Text), MessageLength, b.GetString("MessageClipped"), len(msgIds))
+ for len(msgParts) < len(msgIds) {
+ msgParts = append(msgParts, "((obsoleted by edit))")
+ }
b.Log.Debugf("Editing webhook message")
- err := b.transmitter.Edit(channelID, msg.ID, &discordgo.WebhookParams{
- Content: msg.Text,
- Username: msg.Username,
- AllowedMentions: b.getAllowedMentions(),
- })
- if err == nil {
+ var edit_err error = nil
+ for i := range msgParts {
+ // In case of split-messages where some parts remain the same (i.e. only a typo-fix in a huge message), this causes some noop-updates.
+ // TODO: Optimize away noop-updates of un-edited messages
+ edit_err = b.transmitter.Edit(channelID, msgIds[i], &discordgo.WebhookParams{
+ Content: msgParts[i],
+ Username: msg.Username,
+ AllowedMentions: b.getAllowedMentions(),
+ })
+ if edit_err != nil {
+ break
+ }
+ }
+ if edit_err == nil {
return msg.ID, nil
}
- b.Log.Errorf("Could not edit webhook message: %s", err)
+ b.Log.Errorf("Could not edit webhook message(s): %s; sending as new message(s) instead", edit_err)
}
b.Log.Debugf("Processing webhook sending for message %#v", msg)
- discordMsg, err := b.webhookSend(msg, channelID)
+ msg.Text = b.replaceUserMentions(msg.Text)
+ msgId, err := b.webhookSend(msg, channelID)
if err != nil {
- b.Log.Errorf("Could not broadcast via webhook for message %#v: %s", msg, err)
+ b.Log.Errorf("Could not broadcast via webhook for message %#v: %s", msgId, err)
return "", err
}
- if discordMsg == nil {
- return "", nil
- }
-
- return discordMsg.ID, nil
+ return msgId, nil
}
diff --git a/bridge/helper/helper.go b/bridge/helper/helper.go
index d6488af..d968f4d 100644
--- a/bridge/helper/helper.go
+++ b/bridge/helper/helper.go
@@ -2296 +22933 @@ func ClipMessage(text string, length int, clippingMessage string) string {
return text
}
+func ClipOrSplitMessage(text string, length int, clippingMessage string, splitMax int) []string {
+ var msgParts []string
+ var remainingText = text
+ // Invariant of this splitting loop: No text is lost (msgParts+remainingText is the original text),
+ // and all parts is guaranteed to satisfy the length requirement.
+ for len(msgParts) < splitMax - 1 && len(remainingText) > length {
+ // Decision: The text needs to be split (again).
+ var chunk string
+ var wasted = 0
+ // The longest UTF-8 encoding of a valid rune is 4 bytes (0xF4 0x8F 0xBF 0xBF, encoding U+10FFFF),
+ // so we should never need to waste 4 or more bytes at a time.
+ for wasted < 4 && wasted < length {
+ chunk = remainingText[:length - wasted]
+ if r, _ := utf8.DecodeLastRuneInString(chunk); r == utf8.RuneError {
+ wasted += 1
+ } else {
+ break
+ }
+ }
+ // Note: At this point, "chunk" might still be invalid, if "text" is very broken.
+ msgParts = append(msgParts, chunk)
+ remainingText = remainingText[len(chunk):]
+ }
+ msgParts = append(msgParts, ClipMessage(remainingText, length, clippingMessage))
+ return msgParts
+}
+
// ParseMarkdown takes in an input string as markdown and parses it to html
func ParseMarkdown(input string) string {
extensions := parser.HardLineBreak | parser.NoIntraEmphasis | parser.FencedCode
diff --git a/bridge/helper/helper_test.go b/bridge/helper/helper_test.go
index f21a4bd..739ece9 100644
--- a/bridge/helper/helper_test.go
+++ b/bridge/helper/helper_test.go
@@ -1343 +134105 @@ func TestConvertWebPToPNG(t *testing.T) {
t.Fail()
}
}
+
+var clippingOrSplittingTestCases = map[string]struct {
+ inputText string
+ clipSplitLength int
+ clippingMessage string
+ splitMax int
+ expectedOutput []string
+}{
+ "Short single-line message, split 3": {
+ inputText: "short",
+ clipSplitLength: 20,
+ clippingMessage: "?!?!",
+ splitMax: 3,
+ expectedOutput: []string{"short"},
+ },
+ "Short single-line message, split 1": {
+ inputText: "short",
+ clipSplitLength: 20,
+ clippingMessage: "?!?!",
+ splitMax: 1,
+ expectedOutput: []string{"short"},
+ },
+ "Short single-line message, split 0": {
+ // Mainly check that we don't crash.
+ inputText: "short",
+ clipSplitLength: 20,
+ clippingMessage: "?!?!",
+ splitMax: 0,
+ expectedOutput: []string{"short"},
+ },
+ "Long single-line message, noclip": {
+ inputText: "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.",
+ clipSplitLength: 50,
+ clippingMessage: "?!?!",
+ splitMax: 10,
+ expectedOutput: []string{
+ "Lorem ipsum dolor sit amet, consectetur adipiscing",
+ " elit, sed do eiusmod tempor incididunt ut labore ",
+ "et dolore magna aliqua.",
+ },
+ },
+ "Long single-line message, noclip tight": {
+ inputText: "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.",
+ clipSplitLength: 50,
+ clippingMessage: "?!?!",
+ splitMax: 3,
+ expectedOutput: []string{
+ "Lorem ipsum dolor sit amet, consectetur adipiscing",
+ " elit, sed do eiusmod tempor incididunt ut labore ",
+ "et dolore magna aliqua.",
+ },
+ },
+ "Long single-line message, clip custom": {
+ inputText: "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.",
+ clipSplitLength: 50,
+ clippingMessage: "?!?!",
+ splitMax: 2,
+ expectedOutput: []string{
+ "Lorem ipsum dolor sit amet, consectetur adipiscing",
+ " elit, sed do eiusmod tempor incididunt ut lab?!?!",
+ },
+ },
+ "Long single-line message, clip built-in": {
+ inputText: "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.",
+ clipSplitLength: 50,
+ clippingMessage: "",
+ splitMax: 2,
+ expectedOutput: []string{
+ "Lorem ipsum dolor sit amet, consectetur adipiscing",
+ " elit, sed do eiusmod tempor inc <clipped message>",
+ },
+ },
+ "Short multi-line message": {
+ inputText: "I\ncan't\nget\nno\nsatisfaction!",
+ clipSplitLength: 50,
+ clippingMessage: "",
+ splitMax: 2,
+ expectedOutput: []string{"I\ncan't\nget\nno\nsatisfaction!"},
+ },
+ "Long message containing UTF-8 multi-byte runes": {
+ inputText: "人人生而自由,在尊嚴和權利上一律平等。 他們都具有理性和良知,應該以兄弟情誼的精神對待彼此。",
+ clipSplitLength: 50,
+ clippingMessage: "",
+ splitMax: 10,
+ expectedOutput: []string{
+ "人人生而自由,在尊嚴和權利上一律", // Note: only 48 bytes!
+ "平等。 他們都具有理性和良知,應該", // Note: only 49 bytes!
+ "以兄弟情誼的精神對待彼此。",
+ },
+ },
+}
+
+func TestClipOrSplitMessage(t *testing.T) {
+ for testname, testcase := range clippingOrSplittingTestCases {
+ actualOutput := ClipOrSplitMessage(testcase.inputText, testcase.clipSplitLength, testcase.clippingMessage, testcase.splitMax)
+ assert.Equalf(t, testcase.expectedOutput, actualOutput, "'%s' testcase should give expected lines with clipping+splitting.", testname)
+ for _, splitLine := range testcase.expectedOutput {
+ byteLength := len([]byte(splitLine))
+ assert.True(t, byteLength <= testcase.clipSplitLength, "Splitted line '%s' of testcase '%s' should not exceed the maximum byte-length (%d vs. %d).", splitLine, testname, testcase.clipSplitLength, byteLength)
+ }
+ }
+}
diff --git a/matterbridge.toml.sample b/matterbridge.toml.sample
index 5932b26..a3c471f 100644
--- a/matterbridge.toml.sample
+++ b/matterbridge.toml.sample
@@ -92510 +92517 @@ ShowTopicChange=false
# Supported from the following bridges: slack
SyncTopic=false
-#Message to show when a message is too big
-#Default "<clipped message>"
+# Message to show when a message is too big
+# Default "<clipped message>"
MessageClipped="<clipped message>"
+# Before clipping, try to split messages into at most this many parts. 0 is treated like 1.
+# Be careful with large numbers, as this might cause flooding.
+# Example: A maximum telegram message of 4096 bytes is received. This requires 3 Discord
+# messages (each capped at a hardcoded 1950 bytes).
+# Default 1
+MessageSplitMaxCount=3
+
###################################################################
#telegram section
###################################################################